summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/btree_cache.c22
-rw-r--r--fs/bcachefs/btree_key_cache.c21
-rw-r--r--fs/bcachefs/btree_types.h4
-rw-r--r--fs/bcachefs/fs.c2
-rw-r--r--fs/bcachefs/sysfs.c2
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/buffer.c125
-rw-r--r--fs/dax.c24
-rw-r--r--fs/erofs/utils.c19
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents_status.c23
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/ext4/move_extent.c11
-rw-r--r--fs/ext4/readpage.c14
-rw-r--r--fs/ext4/super.c13
-rw-r--r--fs/f2fs/super.c31
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c48
-rw-r--r--fs/gfs2/glock.c19
-rw-r--r--fs/gfs2/main.c6
-rw-r--r--fs/gfs2/meta_io.c61
-rw-r--r--fs/gfs2/quota.c62
-rw-r--r--fs/gfs2/quota.h3
-rw-r--r--fs/hugetlbfs/inode.c84
-rw-r--r--fs/iomap/buffered-io.c57
-rw-r--r--fs/jbd2/journal.c29
-rw-r--r--fs/kernfs/file.c49
-rw-r--r--fs/kernfs/mount.c2
-rw-r--r--fs/mbcache.c22
-rw-r--r--fs/mpage.c3
-rw-r--r--fs/nfs/nfs42xattr.c87
-rw-r--r--fs/nfs/super.c21
-rw-r--r--fs/nfsd/filecache.c23
-rw-r--r--fs/nfsd/netns.h4
-rw-r--r--fs/nfsd/nfs4state.c19
-rw-r--r--fs/nfsd/nfscache.c31
-rw-r--r--fs/nilfs2/mdt.c66
-rw-r--r--fs/nilfs2/page.c76
-rw-r--r--fs/nilfs2/page.h11
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/ntfs/aops.c255
-rw-r--r--fs/ntfs/file.c89
-rw-r--r--fs/ntfs3/file.c31
-rw-r--r--fs/ocfs2/aops.c19
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c733
-rw-r--r--fs/quota/dquot.c17
-rw-r--r--fs/reiserfs/inode.c80
-rw-r--r--fs/super.c35
-rw-r--r--fs/ubifs/super.c21
-rw-r--r--fs/ufs/balloc.c20
-rw-r--r--fs/ufs/inode.c25
-rw-r--r--fs/ufs/util.c34
-rw-r--r--fs/ufs/util.h10
-rw-r--r--fs/userfaultfd.c96
-rw-r--r--fs/xfs/xfs_buf.c24
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_icache.c26
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_qm.c27
-rw-r--r--fs/xfs/xfs_qm.h2
64 files changed, 1668 insertions, 1014 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 82cf243aa288..5e5858191905 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -285,8 +285,7 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_cache.shrink);
+ struct bch_fs *c = shrink->private_data;
struct btree_cache *bc = &c->btree_cache;
struct btree *b, *t;
unsigned long nr = sc->nr_to_scan;
@@ -384,8 +383,7 @@ out_nounlock:
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_cache.shrink);
+ struct bch_fs *c = shrink->private_data;
struct btree_cache *bc = &c->btree_cache;
if (bch2_btree_shrinker_disabled)
@@ -400,7 +398,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
struct btree *b;
unsigned i, flags;
- unregister_shrinker(&bc->shrink);
+ shrinker_free(bc->shrink);
/* vfree() can allocate memory: */
flags = memalloc_nofs_save();
@@ -454,6 +452,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
int bch2_fs_btree_cache_init(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
+ struct shrinker *shrink;
unsigned i;
int ret = 0;
@@ -473,12 +472,15 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
mutex_init(&c->verify_lock);
- bc->shrink.count_objects = bch2_btree_cache_count;
- bc->shrink.scan_objects = bch2_btree_cache_scan;
- bc->shrink.seeks = 4;
- ret = register_shrinker(&bc->shrink, "%s/btree_cache", c->name);
- if (ret)
+ shrink = shrinker_alloc(0, "%s/btree_cache", c->name);
+ if (!shrink)
goto err;
+ bc->shrink = shrink;
+ shrink->count_objects = bch2_btree_cache_count;
+ shrink->scan_objects = bch2_btree_cache_scan;
+ shrink->seeks = 4;
+ shrink->private_data = c;
+ shrinker_register(shrink);
return 0;
err:
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 29a0b566a4fe..f9a5e38a085b 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -834,8 +834,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_key_cache.shrink);
+ struct bch_fs *c = shrink->private_data;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bucket_table *tbl;
struct bkey_cached *ck, *t;
@@ -932,8 +931,7 @@ out:
static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_key_cache.shrink);
+ struct bch_fs *c = shrink->private_data;
struct btree_key_cache *bc = &c->btree_key_cache;
long nr = atomic_long_read(&bc->nr_keys) -
atomic_long_read(&bc->nr_dirty);
@@ -953,7 +951,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
int cpu;
#endif
- unregister_shrinker(&bc->shrink);
+ shrinker_free(bc->shrink);
mutex_lock(&bc->lock);
@@ -1027,6 +1025,7 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+ struct shrinker *shrink;
#ifdef __KERNEL__
bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
@@ -1039,11 +1038,15 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->table_init_done = true;
- bc->shrink.seeks = 0;
- bc->shrink.count_objects = bch2_btree_key_cache_count;
- bc->shrink.scan_objects = bch2_btree_key_cache_scan;
- if (register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name))
+ shrink = shrinker_alloc(0, "%s/btree_key_cache", c->name);
+ if (!shrink)
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ bc->shrink = shrink;
+ shrink->seeks = 0;
+ shrink->count_objects = bch2_btree_key_cache_count;
+ shrink->scan_objects = bch2_btree_key_cache_scan;
+ shrink->private_data = c;
+ shrinker_register(shrink);
return 0;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index c9a38e254949..bc6714d88925 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -163,7 +163,7 @@ struct btree_cache {
unsigned used;
unsigned reserve;
atomic_t dirty;
- struct shrinker shrink;
+ struct shrinker *shrink;
/*
* If we need to allocate memory for a new btree node and that
@@ -321,7 +321,7 @@ struct btree_key_cache {
bool table_init_done;
struct list_head freed_pcpu;
struct list_head freed_nonpcpu;
- struct shrinker shrink;
+ struct shrinker *shrink;
unsigned shrink_iter;
struct btree_key_cache_freelist __percpu *pcpu_freed;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 6642b88c41a0..a2a5133fb6b5 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1904,7 +1904,7 @@ got_sb:
sb->s_flags |= SB_POSIXACL;
#endif
- sb->s_shrink.seeks = 0;
+ sb->s_shrink->seeks = 0;
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index eb764b9a4629..397116966a7c 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -494,7 +494,7 @@ STORE(bch2_fs)
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
+ c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
}
if (attr == &sysfs_btree_wakeup)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6ecf78d09694..f638dc339693 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1472,7 +1472,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
error = -EBUSY;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
+ shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name,
s->s_id);
btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data);
diff --git a/fs/buffer.c b/fs/buffer.c
index 12e9a71c693d..657a62bab73d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -282,13 +282,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
} while (tmp != bh);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- /*
- * If all of the buffers are uptodate then we can set the page
- * uptodate.
- */
- if (folio_uptodate)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, folio_uptodate);
return;
still_busy:
@@ -915,16 +909,12 @@ int remove_inode_buffers(struct inode *inode)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
- bool retry)
+ gfp_t gfp)
{
struct buffer_head *bh, *head;
- gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
long offset;
struct mem_cgroup *memcg, *old_memcg;
- if (retry)
- gfp |= __GFP_NOFAIL;
-
/* The folio lock pins the memcg */
memcg = folio_memcg(folio);
old_memcg = set_active_memcg(memcg);
@@ -967,7 +957,11 @@ EXPORT_SYMBOL_GPL(folio_alloc_buffers);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry)
{
- return folio_alloc_buffers(page_folio(page), size, retry);
+ gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
+ if (retry)
+ gfp |= __GFP_NOFAIL;
+
+ return folio_alloc_buffers(page_folio(page), size, gfp);
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
@@ -1043,20 +1037,11 @@ grow_dev_page(struct block_device *bdev, sector_t block,
struct buffer_head *bh;
sector_t end_block;
int ret = 0;
- gfp_t gfp_mask;
-
- gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
-
- /*
- * XXX: __getblk_slow() can not really deal with failure and
- * will endlessly loop on improvised global reclaim. Prefer
- * looping in the allocator rather than here, at least that
- * code knows what it's doing.
- */
- gfp_mask |= __GFP_NOFAIL;
folio = __filemap_get_folio(inode->i_mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
bh = folio_buffers(folio);
if (bh) {
@@ -1069,7 +1054,10 @@ grow_dev_page(struct block_device *bdev, sector_t block,
goto failed;
}
- bh = folio_alloc_buffers(folio, size, true);
+ ret = -ENOMEM;
+ bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT);
+ if (!bh)
+ goto failed;
/*
* Link the folio to the buffers and initialise them. Take the
@@ -1420,33 +1408,36 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__find_get_block);
-/*
- * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
- * which corresponds to the passed block_device, block and size. The
- * returned buffer has its reference count incremented.
+/**
+ * bdev_getblk - Get a buffer_head in a block device's buffer cache.
+ * @bdev: The block device.
+ * @block: The block number.
+ * @size: The size of buffer_heads for this @bdev.
+ * @gfp: The memory allocation flags to use.
*
- * __getblk_gfp() will lock up the machine if grow_dev_page's
- * try_to_free_buffers() attempt is failing. FIXME, perhaps?
+ * Return: The buffer head, or NULL if memory could not be allocated.
*/
-struct buffer_head *
-__getblk_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp)
+struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
- might_sleep();
- if (bh == NULL)
- bh = __getblk_slow(bdev, block, size, gfp);
- return bh;
+ might_alloc(gfp);
+ if (bh)
+ return bh;
+
+ return __getblk_slow(bdev, block, size, gfp);
}
-EXPORT_SYMBOL(__getblk_gfp);
+EXPORT_SYMBOL(bdev_getblk);
/*
* Do async read-ahead on a buffer..
*/
void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
- struct buffer_head *bh = __getblk(bdev, block, size);
+ struct buffer_head *bh = bdev_getblk(bdev, block, size,
+ GFP_NOWAIT | __GFP_MOVABLE);
+
if (likely(bh)) {
bh_readahead(bh, REQ_RAHEAD);
brelse(bh);
@@ -1470,7 +1461,17 @@ struct buffer_head *
__bread_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
- struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
+ struct buffer_head *bh;
+
+ gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+ /*
+ * Prefer looping in the allocator rather than here, at least that
+ * code knows what it's doing.
+ */
+ gfp |= __GFP_NOFAIL;
+
+ bh = bdev_getblk(bdev, block, size, gfp);
if (likely(bh) && !buffer_uptodate(bh))
bh = __bread_slow(bh);
@@ -1640,12 +1641,13 @@ EXPORT_SYMBOL(block_invalidate_folio);
* block_dirty_folio() via private_lock. try_to_free_buffers
* is already excluded via the folio lock.
*/
-void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
- unsigned long b_state)
+struct buffer_head *create_empty_buffers(struct folio *folio,
+ unsigned long blocksize, unsigned long b_state)
{
struct buffer_head *bh, *head, *tail;
+ gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL;
- head = folio_alloc_buffers(folio, blocksize, true);
+ head = folio_alloc_buffers(folio, blocksize, gfp);
bh = head;
do {
bh->b_state |= b_state;
@@ -1667,13 +1669,8 @@ void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
}
folio_attach_private(folio, head);
spin_unlock(&folio->mapping->private_lock);
-}
-EXPORT_SYMBOL(folio_create_empty_buffers);
-void create_empty_buffers(struct page *page,
- unsigned long blocksize, unsigned long b_state)
-{
- folio_create_empty_buffers(page_folio(page), blocksize, b_state);
+ return head;
}
EXPORT_SYMBOL(create_empty_buffers);
@@ -1768,13 +1765,15 @@ static struct buffer_head *folio_create_buffers(struct folio *folio,
struct inode *inode,
unsigned int b_state)
{
+ struct buffer_head *bh;
+
BUG_ON(!folio_test_locked(folio));
- if (!folio_buffers(folio))
- folio_create_empty_buffers(folio,
- 1 << READ_ONCE(inode->i_blkbits),
- b_state);
- return folio_buffers(folio);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ 1 << READ_ONCE(inode->i_blkbits), b_state);
+ return bh;
}
/*
@@ -2425,12 +2424,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (!nr) {
/*
- * All buffers are uptodate - we can set the folio uptodate
- * as well. But not if get_block() returned an error.
+ * All buffers are uptodate or get_block() returned an
+ * error when trying to map them - we can finish the read.
*/
- if (!page_error)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, !page_error);
return 0;
}
@@ -2676,10 +2673,8 @@ int block_truncate_page(struct address_space *mapping,
return PTR_ERR(folio);
bh = folio_buffers(folio);
- if (!bh) {
- folio_create_empty_buffers(folio, blocksize, 0);
- bh = folio_buffers(folio);
- }
+ if (!bh)
+ bh = create_empty_buffers(folio, blocksize, 0);
/* Find the buffer that contains "offset" */
offset = offset_in_folio(folio, from);
diff --git a/fs/dax.c b/fs/dax.c
index 8fafecbe42b1..3380b43cb6bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -412,23 +412,23 @@ static struct page *dax_busy_page(void *entry)
return NULL;
}
-/*
- * dax_lock_page - Lock the DAX entry corresponding to a page
- * @page: The page whose entry we want to lock
+/**
+ * dax_lock_folio - Lock the DAX entry corresponding to a folio
+ * @folio: The folio whose entry we want to lock
*
* Context: Process context.
- * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
+ * Return: A cookie to pass to dax_unlock_folio() or 0 if the entry could
* not be locked.
*/
-dax_entry_t dax_lock_page(struct page *page)
+dax_entry_t dax_lock_folio(struct folio *folio)
{
XA_STATE(xas, NULL, 0);
void *entry;
- /* Ensure page->mapping isn't freed while we look at it */
+ /* Ensure folio->mapping isn't freed while we look at it */
rcu_read_lock();
for (;;) {
- struct address_space *mapping = READ_ONCE(page->mapping);
+ struct address_space *mapping = READ_ONCE(folio->mapping);
entry = NULL;
if (!mapping || !dax_mapping(mapping))
@@ -447,11 +447,11 @@ dax_entry_t dax_lock_page(struct page *page)
xas.xa = &mapping->i_pages;
xas_lock_irq(&xas);
- if (mapping != page->mapping) {
+ if (mapping != folio->mapping) {
xas_unlock_irq(&xas);
continue;
}
- xas_set(&xas, page->index);
+ xas_set(&xas, folio->index);
entry = xas_load(&xas);
if (dax_is_locked(entry)) {
rcu_read_unlock();
@@ -467,10 +467,10 @@ dax_entry_t dax_lock_page(struct page *page)
return (dax_entry_t)entry;
}
-void dax_unlock_page(struct page *page, dax_entry_t cookie)
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
{
- struct address_space *mapping = page->mapping;
- XA_STATE(xas, &mapping->i_pages, page->index);
+ struct address_space *mapping = folio->mapping;
+ XA_STATE(xas, &mapping->i_pages, folio->index);
if (S_ISCHR(mapping->host->i_mode))
return;
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 4256a85719a1..5dea308764b4 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -264,19 +264,24 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
return freed;
}
-static struct shrinker erofs_shrinker_info = {
- .scan_objects = erofs_shrink_scan,
- .count_objects = erofs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *erofs_shrinker_info;
int __init erofs_init_shrinker(void)
{
- return register_shrinker(&erofs_shrinker_info, "erofs-shrinker");
+ erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker");
+ if (!erofs_shrinker_info)
+ return -ENOMEM;
+
+ erofs_shrinker_info->count_objects = erofs_shrink_count;
+ erofs_shrinker_info->scan_objects = erofs_shrink_scan;
+
+ shrinker_register(erofs_shrinker_info);
+
+ return 0;
}
void erofs_exit_shrinker(void)
{
- unregister_shrinker(&erofs_shrinker_info);
+ shrinker_free(erofs_shrinker_info);
}
#endif /* !CONFIG_EROFS_FS_ZIP */
diff --git a/fs/exec.c b/fs/exec.c
index 6518e33ea813..4aa19b24f281 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -713,7 +713,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
- vma, new_start, length, false))
+ vma, new_start, length, false, true))
return -ENOMEM;
lru_add_drain();
@@ -986,8 +986,6 @@ static int exec_mmap(struct mm_struct *mm)
tsk = current;
old_mm = current->mm;
exec_mm_release(tsk, old_mm);
- if (old_mm)
- sync_mm_rss(old_mm);
ret = down_write_killable(&tsk->signal->exec_update_lock);
if (ret)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f16aa375c02b..a5d784872303 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1664,7 +1664,7 @@ struct ext4_sb_info {
__u32 s_csum_seed;
/* Reclaim extents from extent status tree */
- struct shrinker s_es_shrinker;
+ struct shrinker *s_es_shrinker;
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index f4b50652f0cc..4a00e2f019d9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1632,7 +1632,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
unsigned long nr;
struct ext4_sb_info *sbi;
- sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
+ sbi = shrink->private_data;
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
return nr;
@@ -1641,8 +1641,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
static unsigned long ext4_es_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct ext4_sb_info *sbi = container_of(shrink,
- struct ext4_sb_info, s_es_shrinker);
+ struct ext4_sb_info *sbi = shrink->private_data;
int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk;
@@ -1726,13 +1725,17 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
if (err)
goto err3;
- sbi->s_es_shrinker.scan_objects = ext4_es_scan;
- sbi->s_es_shrinker.count_objects = ext4_es_count;
- sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
- err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s",
- sbi->s_sb->s_id);
- if (err)
+ sbi->s_es_shrinker = shrinker_alloc(0, "ext4-es:%s", sbi->s_sb->s_id);
+ if (!sbi->s_es_shrinker) {
+ err = -ENOMEM;
goto err4;
+ }
+
+ sbi->s_es_shrinker->scan_objects = ext4_es_scan;
+ sbi->s_es_shrinker->count_objects = ext4_es_count;
+ sbi->s_es_shrinker->private_data = sbi;
+
+ shrinker_register(sbi->s_es_shrinker);
return 0;
err4:
@@ -1752,7 +1755,7 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
- unregister_shrinker(&sbi->s_es_shrinker);
+ shrinker_free(sbi->s_es_shrinker);
}
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a6838f54ae91..61277f7f8722 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1032,10 +1032,8 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
BUG_ON(from > to);
head = folio_buffers(folio);
- if (!head) {
- create_empty_buffers(&folio->page, blocksize, 0);
- head = folio_buffers(folio);
- }
+ if (!head)
+ head = create_empty_buffers(folio, blocksize, 0);
bbits = ilog2(blocksize);
block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
@@ -1165,7 +1163,7 @@ retry_grab:
* starting the handle.
*/
if (!folio_buffers(folio))
- create_empty_buffers(&folio->page, inode->i_sb->s_blocksize, 0);
+ create_empty_buffers(folio, inode->i_sb->s_blocksize, 0);
folio_unlock(folio);
@@ -3655,10 +3653,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
bh = folio_buffers(folio);
- if (!bh) {
- create_empty_buffers(&folio->page, blocksize, 0);
- bh = folio_buffers(folio);
- }
+ if (!bh)
+ bh = create_empty_buffers(folio, blocksize, 0);
/* Find the buffer that contains "offset" */
pos = blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 18a9e7c47975..3aa57376d9c2 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -183,10 +183,8 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
blocksize = i_blocksize(inode);
head = folio_buffers(folio);
- if (!head) {
- create_empty_buffers(&folio->page, blocksize, 0);
- head = folio_buffers(folio);
- }
+ if (!head)
+ head = create_empty_buffers(folio, blocksize, 0);
block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
for (bh = head, block_start = 0; bh != head || !block_start;
@@ -380,9 +378,10 @@ data_copy:
}
/* Perform all necessary steps similar write_begin()/write_end()
* but keeping in mind that i_size will not change */
- if (!folio_buffers(folio[0]))
- create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
bh = folio_buffers(folio[0]);
+ if (!bh)
+ bh = create_empty_buffers(folio[0],
+ 1 << orig_inode->i_blkbits, 0);
for (i = 0; i < data_offset_in_page; i++)
bh = bh->b_this_page;
for (i = 0; i < block_len_in_page; i++) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 3e7d160f543f..21e8f0aebb3c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -70,15 +70,8 @@ static void __read_end_io(struct bio *bio)
{
struct folio_iter fi;
- bio_for_each_folio_all(fi, bio) {
- struct folio *folio = fi.folio;
-
- if (bio->bi_status)
- folio_clear_uptodate(folio);
- else
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- }
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, bio->bi_status == 0);
if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
bio_put(bio);
@@ -336,8 +329,7 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_need_verity(inode, folio->index) &&
!fsverity_verify_folio(folio))
goto set_error_page;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, true);
continue;
}
} else if (fully_mapped) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 77e2b694c7d5..54a9dde7483a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -244,18 +244,25 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
blk_opf_t op_flags)
{
- return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE;
+
+ return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
}
struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block)
{
- return __ext4_sb_bread_gfp(sb, block, 0, 0);
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
+ ~__GFP_FS);
+
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
{
- struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
+ struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
+ sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN);
if (likely(bh)) {
if (trylock_buffer(bh))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be17d77513d5..05f9f7b6ebf8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -83,11 +83,26 @@ void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
#endif
/* f2fs-wide shrinker description */
-static struct shrinker f2fs_shrinker_info = {
- .scan_objects = f2fs_shrink_scan,
- .count_objects = f2fs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *f2fs_shrinker_info;
+
+static int __init f2fs_init_shrinker(void)
+{
+ f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker");
+ if (!f2fs_shrinker_info)
+ return -ENOMEM;
+
+ f2fs_shrinker_info->count_objects = f2fs_shrink_count;
+ f2fs_shrinker_info->scan_objects = f2fs_shrink_scan;
+
+ shrinker_register(f2fs_shrinker_info);
+
+ return 0;
+}
+
+static void f2fs_exit_shrinker(void)
+{
+ shrinker_free(f2fs_shrinker_info);
+}
enum {
Opt_gc_background,
@@ -4940,7 +4955,7 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_sysfs();
if (err)
goto free_garbage_collection_cache;
- err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker");
+ err = f2fs_init_shrinker();
if (err)
goto free_sysfs;
err = register_filesystem(&f2fs_fs_type);
@@ -4985,7 +5000,7 @@ free_root_stats:
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
free_sysfs:
f2fs_exit_sysfs();
free_garbage_collection_cache:
@@ -5017,7 +5032,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
f2fs_exit_sysfs();
f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c26d48355cc2..6b060fc9e260 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -130,7 +130,7 @@ static int __gfs2_jdata_write_folio(struct folio *folio,
if (folio_test_checked(folio)) {
folio_clear_checked(folio);
if (!folio_buffers(folio)) {
- folio_create_empty_buffers(folio,
+ create_empty_buffers(folio,
inode->i_sb->s_blocksize,
BIT(BH_Dirty)|BIT(BH_Uptodate));
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 011cd992e0e6..6eb6f1bd9e34 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -43,53 +43,51 @@ struct metapath {
static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
/**
- * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
+ * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio
* @ip: the inode
* @dibh: the dinode buffer
* @block: the block number that was allocated
- * @page: The (optional) page. This is looked up if @page is NULL
+ * @folio: The folio.
*
* Returns: errno
*/
-
-static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
- u64 block, struct page *page)
+static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh,
+ u64 block, struct folio *folio)
{
struct inode *inode = &ip->i_inode;
- if (!PageUptodate(page)) {
- void *kaddr = kmap(page);
+ if (!folio_test_uptodate(folio)) {
+ void *kaddr = kmap_local_folio(folio, 0);
u64 dsize = i_size_read(inode);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
- memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
- kunmap(page);
+ memset(kaddr + dsize, 0, folio_size(folio) - dsize);
+ kunmap_local(kaddr);
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
if (gfs2_is_jdata(ip)) {
- struct buffer_head *bh;
+ struct buffer_head *bh = folio_buffers(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, BIT(inode->i_blkbits),
- BIT(BH_Uptodate));
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ BIT(inode->i_blkbits), BIT(BH_Uptodate));
- bh = page_buffers(page);
if (!buffer_mapped(bh))
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
} else {
- set_page_dirty(page);
+ folio_mark_dirty(folio);
gfs2_ordered_add_inode(ip);
}
return 0;
}
-static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
+static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio)
{
struct buffer_head *bh, *dibh;
struct gfs2_dinode *di;
@@ -118,7 +116,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
dibh, sizeof(struct gfs2_dinode));
brelse(bh);
} else {
- error = gfs2_unstuffer_page(ip, dibh, block, page);
+ error = gfs2_unstuffer_folio(ip, dibh, block, folio);
if (error)
goto out_brelse;
}
@@ -157,17 +155,17 @@ out_brelse:
int gfs2_unstuff_dinode(struct gfs2_inode *ip)
{
struct inode *inode = &ip->i_inode;
- struct page *page;
+ struct folio *folio;
int error;
down_write(&ip->i_rw_mutex);
- page = grab_cache_page(inode->i_mapping, 0);
- error = -ENOMEM;
- if (!page)
+ folio = filemap_grab_folio(inode->i_mapping, 0);
+ error = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out;
- error = __gfs2_unstuff_inode(ip, page);
- unlock_page(page);
- put_page(page);
+ error = __gfs2_unstuff_inode(ip, folio);
+ folio_unlock(folio);
+ folio_put(folio);
out:
up_write(&ip->i_rw_mutex);
return error;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3772a5d9e85c..d5fa75eac0bf 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2041,11 +2041,7 @@ static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
return vfs_pressure_ratio(atomic_read(&lru_count));
}
-static struct shrinker glock_shrinker = {
- .seeks = DEFAULT_SEEKS,
- .count_objects = gfs2_glock_shrink_count,
- .scan_objects = gfs2_glock_shrink_scan,
-};
+static struct shrinker *glock_shrinker;
/**
* glock_hash_walk - Call a function for glock in a hash bucket
@@ -2465,13 +2461,18 @@ int __init gfs2_glock_init(void)
return -ENOMEM;
}
- ret = register_shrinker(&glock_shrinker, "gfs2-glock");
- if (ret) {
+ glock_shrinker = shrinker_alloc(0, "gfs2-glock");
+ if (!glock_shrinker) {
destroy_workqueue(glock_workqueue);
rhashtable_destroy(&gl_hash_table);
- return ret;
+ return -ENOMEM;
}
+ glock_shrinker->count_objects = gfs2_glock_shrink_count;
+ glock_shrinker->scan_objects = gfs2_glock_shrink_scan;
+
+ shrinker_register(glock_shrinker);
+
for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
init_waitqueue_head(glock_wait_table + i);
@@ -2480,7 +2481,7 @@ int __init gfs2_glock_init(void)
void gfs2_glock_exit(void)
{
- unregister_shrinker(&glock_shrinker);
+ shrinker_free(glock_shrinker);
rhashtable_destroy(&gl_hash_table);
destroy_workqueue(glock_workqueue);
}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 66eb98b690a2..79be0cdc730c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -147,7 +147,7 @@ static int __init init_gfs2_fs(void)
if (!gfs2_trans_cachep)
goto fail_cachep8;
- error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd");
+ error = gfs2_qd_shrinker_init();
if (error)
goto fail_shrinker;
@@ -196,7 +196,7 @@ fail_wq3:
fail_wq2:
destroy_workqueue(gfs2_recovery_wq);
fail_wq1:
- unregister_shrinker(&gfs2_qd_shrinker);
+ gfs2_qd_shrinker_exit();
fail_shrinker:
kmem_cache_destroy(gfs2_trans_cachep);
fail_cachep8:
@@ -229,7 +229,7 @@ fail_lru:
static void __exit exit_gfs2_fs(void)
{
- unregister_shrinker(&gfs2_qd_shrinker);
+ gfs2_qd_shrinker_exit();
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 924361fa510b..25ceb0805df2 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -115,7 +115,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
@@ -129,36 +129,31 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
- for (;;) {
- page = grab_cache_page(mapping, index);
- if (page)
- break;
- yield();
- }
- if (!page_has_buffers(page))
- create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping) | __GFP_NOFAIL);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ sdp->sd_sb.sb_bsize, 0);
} else {
- page = find_get_page_flags(mapping, index,
- FGP_LOCK|FGP_ACCESSED);
- if (!page)
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED, 0);
+ if (IS_ERR(folio))
return NULL;
- if (!page_has_buffers(page)) {
- bh = NULL;
- goto out_unlock;
- }
+ bh = folio_buffers(folio);
}
- /* Locate header for our buffer within our page */
- for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
- /* Do nothing */;
- get_bh(bh);
+ if (!bh)
+ goto out_unlock;
+ bh = get_nth_bh(bh, bufnum);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return bh;
}
@@ -405,26 +400,20 @@ static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
{
struct address_space *mapping = ip->i_inode.i_mapping;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
unsigned long index = blkno >> shift; /* convert block to page */
unsigned int bufnum = blkno - (index << shift);
- page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED);
- if (!page)
+ folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, 0);
+ if (IS_ERR(folio))
return NULL;
- if (!page_has_buffers(page)) {
- unlock_page(page);
- put_page(page);
- return NULL;
- }
- /* Locate header for our buffer within our page */
- for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
- /* Do nothing */;
- get_bh(bh);
- unlock_page(page);
- put_page(page);
+ bh = folio_buffers(folio);
+ if (bh)
+ bh = get_nth_bh(bh, bufnum);
+ folio_unlock(folio);
+ folio_put(folio);
return bh;
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index d9854aece15b..5cbbc1a46a92 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -196,13 +196,26 @@ static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc));
}
-struct shrinker gfs2_qd_shrinker = {
- .count_objects = gfs2_qd_shrink_count,
- .scan_objects = gfs2_qd_shrink_scan,
- .seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_NUMA_AWARE,
-};
+static struct shrinker *gfs2_qd_shrinker;
+
+int __init gfs2_qd_shrinker_init(void)
+{
+ gfs2_qd_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "gfs2-qd");
+ if (!gfs2_qd_shrinker)
+ return -ENOMEM;
+
+ gfs2_qd_shrinker->count_objects = gfs2_qd_shrink_count;
+ gfs2_qd_shrinker->scan_objects = gfs2_qd_shrink_scan;
+
+ shrinker_register(gfs2_qd_shrinker);
+ return 0;
+}
+
+void gfs2_qd_shrinker_exit(void)
+{
+ shrinker_free(gfs2_qd_shrinker);
+}
static u64 qd2index(struct gfs2_quota_data *qd)
{
@@ -736,7 +749,7 @@ static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct inode *inode = &ip->i_inode;
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
u64 blk;
unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
@@ -745,15 +758,15 @@ static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
boff = off % bsize;
- page = grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
- if (!page_has_buffers(page))
- create_empty_buffers(page, bsize, 0);
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio, bsize, 0);
- bh = page_buffers(page);
- for(;;) {
- /* Find the beginning block within the page */
+ for (;;) {
+ /* Find the beginning block within the folio */
if (pg_off >= ((bnum * bsize) + bsize)) {
bh = bh->b_this_page;
bnum++;
@@ -766,9 +779,10 @@ static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
goto unlock_out;
/* If it's a newly allocated disk block, zero it */
if (buffer_new(bh))
- zero_user(page, bnum * bsize, bh->b_size);
+ folio_zero_range(folio, bnum * bsize,
+ bh->b_size);
}
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
goto unlock_out;
@@ -784,17 +798,17 @@ static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
break;
}
- /* Write to the page, now that we have setup the buffer(s) */
- memcpy_to_page(page, off, buf, bytes);
- flush_dcache_page(page);
- unlock_page(page);
- put_page(page);
+ /* Write to the folio, now that we have setup the buffer(s) */
+ memcpy_to_folio(folio, off, buf, bytes);
+ flush_dcache_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return 0;
unlock_out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return -EIO;
}
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 1429945215a0..36f54b426b0c 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -60,7 +60,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
}
extern const struct quotactl_ops gfs2_quotactl_ops;
-extern struct shrinker gfs2_qd_shrinker;
+int __init gfs2_qd_shrinker_init(void);
+void gfs2_qd_shrinker_exit(void);
extern struct list_lru gfs2_qd_lru;
extern void __init gfs2_quota_hash_init(void);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index da217eaba102..54b3d489b6a7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -83,29 +83,6 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
{}
};
-#ifdef CONFIG_NUMA
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
- vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
- index);
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
- mpol_cond_put(vma->vm_policy);
-}
-#else
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
-}
-#endif
-
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
@@ -135,7 +112,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
vma->vm_ops = &hugetlb_vm_ops;
- ret = seal_check_future_write(info->seals, vma);
+ ret = seal_check_write(info->seals, vma);
if (ret)
return ret;
@@ -295,7 +272,7 @@ static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t byt
size_t res = 0;
/* First subpage to start the loop. */
- page += offset / PAGE_SIZE;
+ page = nth_page(page, offset / PAGE_SIZE);
offset %= PAGE_SIZE;
while (1) {
if (is_raw_hwpoison_page_in_hugepage(page))
@@ -309,7 +286,7 @@ static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t byt
break;
offset += n;
if (offset == PAGE_SIZE) {
- page++;
+ page = nth_page(page, 1);
offset = 0;
}
}
@@ -334,7 +311,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
ssize_t retval = 0;
while (iov_iter_count(to)) {
- struct page *page;
+ struct folio *folio;
size_t nr, copied, want;
/* nr is the maximum number of bytes to copy from this page */
@@ -352,18 +329,18 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
nr = nr - offset;
- /* Find the page */
- page = find_lock_page(mapping, index);
- if (unlikely(page == NULL)) {
+ /* Find the folio */
+ folio = filemap_lock_hugetlb_folio(h, mapping, index);
+ if (IS_ERR(folio)) {
/*
* We have a HOLE, zero out the user-buffer for the
* length of the hole or request.
*/
copied = iov_iter_zero(nr, to);
} else {
- unlock_page(page);
+ folio_unlock(folio);
- if (!PageHWPoison(page))
+ if (!folio_test_has_hwpoisoned(folio))
want = nr;
else {
/*
@@ -371,19 +348,19 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
* touching the 1st raw HWPOISON subpage after
* offset.
*/
- want = adjust_range_hwpoison(page, offset, nr);
+ want = adjust_range_hwpoison(&folio->page, offset, nr);
if (want == 0) {
- put_page(page);
+ folio_put(folio);
retval = -EIO;
break;
}
}
/*
- * We have the page, copy it to user space buffer.
+ * We have the folio, copy it to user space buffer.
*/
- copied = copy_page_to_iter(page, offset, want, to);
- put_page(page);
+ copied = copy_folio_to_iter(folio, offset, want, to);
+ folio_put(folio);
}
offset += copied;
retval += copied;
@@ -661,21 +638,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
{
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
- const pgoff_t start = lstart >> huge_page_shift(h);
- const pgoff_t end = lend >> huge_page_shift(h);
+ const pgoff_t end = lend >> PAGE_SHIFT;
struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
folio_batch_init(&fbatch);
- next = start;
+ next = lstart >> PAGE_SHIFT;
while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index;
+ index = folio->index >> huge_page_order(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -693,7 +669,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
if (truncate_op)
- (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
+ (void)hugetlb_unreserve_pages(inode,
+ lstart >> huge_page_shift(h),
+ LONG_MAX, freed);
}
static void hugetlbfs_evict_inode(struct inode *inode)
@@ -741,7 +719,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
pgoff_t idx = start >> huge_page_shift(h);
struct folio *folio;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio))
return;
@@ -852,8 +830,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/*
* Initialize a pseudo vma as this is required by the huge page
- * allocation routines. If NUMA is configured, use page index
- * as input to create an allocation policy.
+ * allocation routines.
*/
vma_init(&pseudo_vma, mm);
vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
@@ -886,7 +863,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- folio = filemap_get_folio(mapping, index);
+ folio = filemap_get_folio(mapping, index << huge_page_order(h));
if (!IS_ERR(folio)) {
folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -901,9 +878,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
- hugetlb_set_vma_policy(&pseudo_vma, inode, index);
folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
- hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
error = PTR_ERR(folio);
@@ -1282,18 +1257,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
hugetlbfs_inc_free_inodes(sbinfo);
return NULL;
}
-
- /*
- * Any time after allocation, hugetlbfs_destroy_inode can be called
- * for the inode. mpol_free_shared_policy is unconditionally called
- * as part of hugetlbfs_destroy_inode. So, initialize policy here
- * in case of a quick call to destroy.
- *
- * Note that the policy is initialized even if we are creating a
- * private inode. This simplifies hugetlbfs_destroy_inode.
- */
- mpol_shared_policy_init(&p->policy, NULL);
-
return &p->vfs_inode;
}
@@ -1305,7 +1268,6 @@ static void hugetlbfs_free_inode(struct inode *inode)
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
- mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
}
static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 2bc0aa23fde3..f72df2babe56 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -29,9 +29,9 @@ typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
* and I/O completions.
*/
struct iomap_folio_state {
- atomic_t read_bytes_pending;
- atomic_t write_bytes_pending;
spinlock_t state_lock;
+ unsigned int read_bytes_pending;
+ atomic_t write_bytes_pending;
/*
* Each block has two bits in this bitmap:
@@ -57,30 +57,32 @@ static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
return test_bit(block, ifs->state);
}
-static void ifs_set_range_uptodate(struct folio *folio,
+static bool ifs_set_range_uptodate(struct folio *folio,
struct iomap_folio_state *ifs, size_t off, size_t len)
{
struct inode *inode = folio->mapping->host;
unsigned int first_blk = off >> inode->i_blkbits;
unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
unsigned int nr_blks = last_blk - first_blk + 1;
- unsigned long flags;
- spin_lock_irqsave(&ifs->state_lock, flags);
bitmap_set(ifs->state, first_blk, nr_blks);
- if (ifs_is_fully_uptodate(folio, ifs))
- folio_mark_uptodate(folio);
- spin_unlock_irqrestore(&ifs->state_lock, flags);
+ return ifs_is_fully_uptodate(folio, ifs);
}
static void iomap_set_range_uptodate(struct folio *folio, size_t off,
size_t len)
{
struct iomap_folio_state *ifs = folio->private;
+ unsigned long flags;
+ bool uptodate = true;
- if (ifs)
- ifs_set_range_uptodate(folio, ifs, off, len);
- else
+ if (ifs) {
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+ }
+
+ if (uptodate)
folio_mark_uptodate(folio);
}
@@ -181,7 +183,7 @@ static void ifs_free(struct folio *folio)
if (!ifs)
return;
- WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
+ WARN_ON_ONCE(ifs->read_bytes_pending != 0);
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) !=
folio_test_uptodate(folio));
@@ -248,20 +250,28 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
*lenp = plen;
}
-static void iomap_finish_folio_read(struct folio *folio, size_t offset,
+static void iomap_finish_folio_read(struct folio *folio, size_t off,
size_t len, int error)
{
struct iomap_folio_state *ifs = folio->private;
+ bool uptodate = !error;
+ bool finished = true;
- if (unlikely(error)) {
- folio_clear_uptodate(folio);
- folio_set_error(folio);
- } else {
- iomap_set_range_uptodate(folio, offset, len);
+ if (ifs) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ if (!error)
+ uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
+ ifs->read_bytes_pending -= len;
+ finished = !ifs->read_bytes_pending;
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
}
- if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
- folio_unlock(folio);
+ if (error)
+ folio_set_error(folio);
+ if (finished)
+ folio_end_read(folio, uptodate);
}
static void iomap_read_end_io(struct bio *bio)
@@ -358,8 +368,11 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
}
ctx->cur_folio_in_bio = true;
- if (ifs)
- atomic_add(plen, &ifs->read_bytes_pending);
+ if (ifs) {
+ spin_lock_irq(&ifs->state_lock);
+ ifs->read_bytes_pending += plen;
+ spin_unlock_irq(&ifs->state_lock);
+ }
sector = iomap_sector(iomap, pos);
if (!ctx->bio ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 30dec2bd2ecc..ed53188472f9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1290,7 +1290,7 @@ static int jbd2_min_tag_size(void)
static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+ journal_t *journal = shrink->private_data;
unsigned long nr_to_scan = sc->nr_to_scan;
unsigned long nr_shrunk;
unsigned long count;
@@ -1316,7 +1316,7 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+ journal_t *journal = shrink->private_data;
unsigned long count;
count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
@@ -1588,14 +1588,21 @@ static journal_t *journal_init_common(struct block_device *bdev,
goto err_cleanup;
journal->j_shrink_transaction = NULL;
- journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
- journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
- journal->j_shrinker.seeks = DEFAULT_SEEKS;
- journal->j_shrinker.batch = journal->j_max_transaction_buffers;
- err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
- MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
- if (err)
+
+ journal->j_shrinker = shrinker_alloc(0, "jbd2-journal:(%u:%u)",
+ MAJOR(bdev->bd_dev),
+ MINOR(bdev->bd_dev));
+ if (!journal->j_shrinker) {
+ err = -ENOMEM;
goto err_cleanup;
+ }
+
+ journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
+ journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
+ journal->j_shrinker->batch = journal->j_max_transaction_buffers;
+ journal->j_shrinker->private_data = journal;
+
+ shrinker_register(journal->j_shrinker);
return journal;
@@ -2172,9 +2179,9 @@ int jbd2_journal_destroy(journal_t *journal)
brelse(journal->j_sb_buffer);
}
- if (journal->j_shrinker.flags & SHRINKER_REGISTERED) {
+ if (journal->j_shrinker) {
percpu_counter_destroy(&journal->j_checkpoint_jh_count);
- unregister_shrinker(&journal->j_shrinker);
+ shrinker_free(journal->j_shrinker);
}
if (journal->j_proc_entry)
jbd2_stats_proc_exit(journal);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 180906c36f51..aaa76410e550 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -429,60 +429,11 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
-#ifdef CONFIG_NUMA
-static int kernfs_vma_set_policy(struct vm_area_struct *vma,
- struct mempolicy *new)
-{
- struct file *file = vma->vm_file;
- struct kernfs_open_file *of = kernfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!kernfs_get_active(of->kn))
- return -EINVAL;
-
- ret = 0;
- if (of->vm_ops->set_policy)
- ret = of->vm_ops->set_policy(vma, new);
-
- kernfs_put_active(of->kn);
- return ret;
-}
-
-static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
- unsigned long addr)
-{
- struct file *file = vma->vm_file;
- struct kernfs_open_file *of = kernfs_of(file);
- struct mempolicy *pol;
-
- if (!of->vm_ops)
- return vma->vm_policy;
-
- if (!kernfs_get_active(of->kn))
- return vma->vm_policy;
-
- pol = vma->vm_policy;
- if (of->vm_ops->get_policy)
- pol = of->vm_ops->get_policy(vma, addr);
-
- kernfs_put_active(of->kn);
- return pol;
-}
-
-#endif
-
static const struct vm_operations_struct kernfs_vm_ops = {
.open = kernfs_vma_open,
.fault = kernfs_vma_fault,
.page_mkwrite = kernfs_vma_page_mkwrite,
.access = kernfs_vma_access,
-#ifdef CONFIG_NUMA
- .set_policy = kernfs_vma_set_policy,
- .get_policy = kernfs_vma_get_policy,
-#endif
};
static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index c4bf26142eec..79b96e74a8a0 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -265,7 +265,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
sb->s_time_gran = 1;
/* sysfs dentries and inodes don't require IO to create */
- sb->s_shrink.seeks = 0;
+ sb->s_shrink->seeks = 0;
/* get root inode, initialize and unlock it */
down_read(&kf_root->kernfs_rwsem);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2a4b8b549e93..82aa7a35db26 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -37,7 +37,7 @@ struct mb_cache {
struct list_head c_list;
/* Number of entries in cache */
unsigned long c_entry_count;
- struct shrinker c_shrink;
+ struct shrinker *c_shrink;
/* Work for shrinking when the cache has too many entries */
struct work_struct c_shrink_work;
};
@@ -293,8 +293,7 @@ EXPORT_SYMBOL(mb_cache_entry_touch);
static unsigned long mb_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct mb_cache *cache = container_of(shrink, struct mb_cache,
- c_shrink);
+ struct mb_cache *cache = shrink->private_data;
return cache->c_entry_count;
}
@@ -333,8 +332,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
static unsigned long mb_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct mb_cache *cache = container_of(shrink, struct mb_cache,
- c_shrink);
+ struct mb_cache *cache = shrink->private_data;
return mb_cache_shrink(cache, sc->nr_to_scan);
}
@@ -377,15 +375,19 @@ struct mb_cache *mb_cache_create(int bucket_bits)
for (i = 0; i < bucket_count; i++)
INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
- cache->c_shrink.count_objects = mb_cache_count;
- cache->c_shrink.scan_objects = mb_cache_scan;
- cache->c_shrink.seeks = DEFAULT_SEEKS;
- if (register_shrinker(&cache->c_shrink, "mbcache-shrinker")) {
+ cache->c_shrink = shrinker_alloc(0, "mbcache-shrinker");
+ if (!cache->c_shrink) {
kfree(cache->c_hash);
kfree(cache);
goto err_out;
}
+ cache->c_shrink->count_objects = mb_cache_count;
+ cache->c_shrink->scan_objects = mb_cache_scan;
+ cache->c_shrink->private_data = cache;
+
+ shrinker_register(cache->c_shrink);
+
INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
return cache;
@@ -406,7 +408,7 @@ void mb_cache_destroy(struct mb_cache *cache)
{
struct mb_cache_entry *entry, *next;
- unregister_shrinker(&cache->c_shrink);
+ shrinker_free(cache->c_shrink);
/*
* We don't bother with any locking. Cache must not be used at this
diff --git a/fs/mpage.c b/fs/mpage.c
index 242e213ee064..ffb064ed9d04 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -119,8 +119,7 @@ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
folio_mark_uptodate(folio);
return;
}
- create_empty_buffers(&folio->page, i_blocksize(inode), 0);
- head = folio_buffers(folio);
+ head = create_empty_buffers(folio, i_blocksize(inode), 0);
}
page_bh = head;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 911f634ba3da..2ad66a8922f4 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -796,28 +796,9 @@ static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfs4_xattr_cache_shrinker = {
- .count_objects = nfs4_xattr_cache_count,
- .scan_objects = nfs4_xattr_cache_scan,
- .seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_MEMCG_AWARE,
-};
-
-static struct shrinker nfs4_xattr_entry_shrinker = {
- .count_objects = nfs4_xattr_entry_count,
- .scan_objects = nfs4_xattr_entry_scan,
- .seeks = DEFAULT_SEEKS,
- .batch = 512,
- .flags = SHRINKER_MEMCG_AWARE,
-};
-
-static struct shrinker nfs4_xattr_large_entry_shrinker = {
- .count_objects = nfs4_xattr_entry_count,
- .scan_objects = nfs4_xattr_entry_scan,
- .seeks = 1,
- .batch = 512,
- .flags = SHRINKER_MEMCG_AWARE,
-};
+static struct shrinker *nfs4_xattr_cache_shrinker;
+static struct shrinker *nfs4_xattr_entry_shrinker;
+static struct shrinker *nfs4_xattr_large_entry_shrinker;
static enum lru_status
cache_lru_isolate(struct list_head *item,
@@ -943,7 +924,7 @@ nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
struct nfs4_xattr_entry *entry;
struct list_lru *lru;
- lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
@@ -971,7 +952,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
unsigned long count;
struct list_lru *lru;
- lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
count = list_lru_shrink_count(lru, sc);
@@ -991,18 +972,34 @@ static void nfs4_xattr_cache_init_once(void *p)
INIT_LIST_HEAD(&cache->dispose);
}
-static int nfs4_xattr_shrinker_init(struct shrinker *shrinker,
- struct list_lru *lru, const char *name)
+typedef unsigned long (*count_objects_cb)(struct shrinker *s,
+ struct shrink_control *sc);
+typedef unsigned long (*scan_objects_cb)(struct shrinker *s,
+ struct shrink_control *sc);
+
+static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker,
+ struct list_lru *lru, const char *name,
+ count_objects_cb count,
+ scan_objects_cb scan, long batch, int seeks)
{
- int ret = 0;
+ int ret;
- ret = register_shrinker(shrinker, name);
- if (ret)
+ *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name);
+ if (!*shrinker)
+ return -ENOMEM;
+
+ ret = list_lru_init_memcg(lru, *shrinker);
+ if (ret) {
+ shrinker_free(*shrinker);
return ret;
+ }
- ret = list_lru_init_memcg(lru, shrinker);
- if (ret)
- unregister_shrinker(shrinker);
+ (*shrinker)->count_objects = count;
+ (*shrinker)->scan_objects = scan;
+ (*shrinker)->batch = batch;
+ (*shrinker)->seeks = seeks;
+
+ shrinker_register(*shrinker);
return ret;
}
@@ -1010,7 +1007,7 @@ static int nfs4_xattr_shrinker_init(struct shrinker *shrinker,
static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker,
struct list_lru *lru)
{
- unregister_shrinker(shrinker);
+ shrinker_free(shrinker);
list_lru_destroy(lru);
}
@@ -1026,27 +1023,31 @@ int __init nfs4_xattr_cache_init(void)
return -ENOMEM;
ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker,
- &nfs4_xattr_cache_lru,
- "nfs-xattr_cache");
+ &nfs4_xattr_cache_lru, "nfs-xattr_cache",
+ nfs4_xattr_cache_count,
+ nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS);
if (ret)
goto out1;
ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker,
- &nfs4_xattr_entry_lru,
- "nfs-xattr_entry");
+ &nfs4_xattr_entry_lru, "nfs-xattr_entry",
+ nfs4_xattr_entry_count,
+ nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS);
if (ret)
goto out2;
ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker,
&nfs4_xattr_large_entry_lru,
- "nfs-xattr_large_entry");
+ "nfs-xattr_large_entry",
+ nfs4_xattr_entry_count,
+ nfs4_xattr_entry_scan, 512, 1);
if (!ret)
return 0;
- nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
&nfs4_xattr_entry_lru);
out2:
- nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
&nfs4_xattr_cache_lru);
out1:
kmem_cache_destroy(nfs4_xattr_cache_cachep);
@@ -1056,11 +1057,11 @@ out1:
void nfs4_xattr_cache_exit(void)
{
- nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker,
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker,
&nfs4_xattr_large_entry_lru);
- nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
&nfs4_xattr_entry_lru);
- nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
&nfs4_xattr_cache_lru);
kmem_cache_destroy(nfs4_xattr_cache_cachep);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9b1cfca8112a..2667ab753d42 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -129,11 +129,7 @@ static void nfs_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static struct shrinker acl_shrinker = {
- .count_objects = nfs_access_cache_count,
- .scan_objects = nfs_access_cache_scan,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *acl_shrinker;
/*
* Register the NFS filesystems
@@ -153,9 +149,18 @@ int __init register_nfs_fs(void)
ret = nfs_register_sysctl();
if (ret < 0)
goto error_2;
- ret = register_shrinker(&acl_shrinker, "nfs-acl");
- if (ret < 0)
+
+ acl_shrinker = shrinker_alloc(0, "nfs-acl");
+ if (!acl_shrinker) {
+ ret = -ENOMEM;
goto error_3;
+ }
+
+ acl_shrinker->count_objects = nfs_access_cache_count;
+ acl_shrinker->scan_objects = nfs_access_cache_scan;
+
+ shrinker_register(acl_shrinker);
+
#ifdef CONFIG_NFS_V4_2
nfs_ssc_register_ops();
#endif
@@ -175,7 +180,7 @@ error_0:
*/
void __exit unregister_nfs_fs(void)
{
- unregister_shrinker(&acl_shrinker);
+ shrinker_free(acl_shrinker);
nfs_unregister_sysctl();
unregister_nfs4_fs();
#ifdef CONFIG_NFS_V4_2
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 07bf219f9ae4..ef063f93fde9 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -521,11 +521,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
return ret;
}
-static struct shrinker nfsd_file_shrinker = {
- .scan_objects = nfsd_file_lru_scan,
- .count_objects = nfsd_file_lru_count,
- .seeks = 1,
-};
+static struct shrinker *nfsd_file_shrinker;
/**
* nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
@@ -746,12 +742,19 @@ nfsd_file_cache_init(void)
goto out_err;
}
- ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache");
- if (ret) {
- pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+ nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache");
+ if (!nfsd_file_shrinker) {
+ ret = -ENOMEM;
+ pr_err("nfsd: failed to allocate nfsd_file_shrinker\n");
goto out_lru;
}
+ nfsd_file_shrinker->count_objects = nfsd_file_lru_count;
+ nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan;
+ nfsd_file_shrinker->seeks = 1;
+
+ shrinker_register(nfsd_file_shrinker);
+
ret = lease_register_notifier(&nfsd_file_lease_notifier);
if (ret) {
pr_err("nfsd: unable to register lease notifier: %d\n", ret);
@@ -774,7 +777,7 @@ out:
out_notifier:
lease_unregister_notifier(&nfsd_file_lease_notifier);
out_shrinker:
- unregister_shrinker(&nfsd_file_shrinker);
+ shrinker_free(nfsd_file_shrinker);
out_lru:
list_lru_destroy(&nfsd_file_lru);
out_err:
@@ -891,7 +894,7 @@ nfsd_file_cache_shutdown(void)
return;
lease_unregister_notifier(&nfsd_file_lease_notifier);
- unregister_shrinker(&nfsd_file_shrinker);
+ shrinker_free(nfsd_file_shrinker);
/*
* make sure all callers of nfsd_file_lru_cb are done before
* calling nfsd_file_cache_purge
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ec49b200b797..ab303a8b77d5 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -177,7 +177,7 @@ struct nfsd_net {
/* size of cache when we saw the longest hash chain */
unsigned int longest_chain_cachesize;
- struct shrinker nfsd_reply_cache_shrinker;
+ struct shrinker *nfsd_reply_cache_shrinker;
/* tracking server-to-server copy mounts */
spinlock_t nfsd_ssc_lock;
@@ -195,7 +195,7 @@ struct nfsd_net {
int nfs4_max_clients;
atomic_t nfsd_courtesy_clients;
- struct shrinker nfsd_client_shrinker;
+ struct shrinker *nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work;
};
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 65fd5510323a..4045c852a450 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4452,8 +4452,7 @@ static unsigned long
nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
int count;
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_client_shrinker);
+ struct nfsd_net *nn = shrink->private_data;
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
@@ -8235,12 +8234,16 @@ static int nfs4_state_create_net(struct net *net)
INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
- nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-
- if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
+ nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client");
+ if (!nn->nfsd_client_shrinker)
goto err_shrinker;
+
+ nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker->private_data = nn;
+
+ shrinker_register(nn->nfsd_client_shrinker);
+
return 0;
err_shrinker:
@@ -8338,7 +8341,7 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- unregister_shrinker(&nn->nfsd_client_shrinker);
+ shrinker_free(nn->nfsd_client_shrinker);
cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 80621a709510..fd56a52aa5fb 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -201,26 +201,29 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
- int status = 0;
nn->max_drc_entries = nfsd_cache_size_limit();
atomic_set(&nn->num_drc_entries, 0);
hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize);
- nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
- nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
- nn->nfsd_reply_cache_shrinker.seeks = 1;
- status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
- "nfsd-reply:%s", nn->nfsd_name);
- if (status)
- return status;
-
nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
if (!nn->drc_hashtbl)
+ return -ENOMEM;
+
+ nn->nfsd_reply_cache_shrinker = shrinker_alloc(0, "nfsd-reply:%s",
+ nn->nfsd_name);
+ if (!nn->nfsd_reply_cache_shrinker)
goto out_shrinker;
+ nn->nfsd_reply_cache_shrinker->scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker->count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker->seeks = 1;
+ nn->nfsd_reply_cache_shrinker->private_data = nn;
+
+ shrinker_register(nn->nfsd_reply_cache_shrinker);
+
for (i = 0; i < hashsize; i++) {
INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
@@ -229,7 +232,7 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0;
out_shrinker:
- unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
+ kvfree(nn->drc_hashtbl);
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM;
}
@@ -239,7 +242,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
struct nfsd_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
+ shrinker_free(nn->nfsd_reply_cache_shrinker);
for (i = 0; i < nn->drc_hashsize; i++) {
struct list_head *head = &nn->drc_hashtbl[i].lru_head;
@@ -323,8 +326,7 @@ nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
+ struct nfsd_net *nn = shrink->private_data;
return atomic_read(&nn->num_drc_entries);
}
@@ -343,8 +345,7 @@ nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
+ struct nfsd_net *nn = shrink->private_data;
unsigned long freed = 0;
LIST_HEAD(dispose);
unsigned int i;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 19c8158605ed..c97c77a39668 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -356,30 +356,28 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
*/
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
- pgoff_t index = (pgoff_t)block >>
- (PAGE_SHIFT - inode->i_blkbits);
- struct page *page;
- unsigned long first_block;
+ pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits);
+ struct folio *folio;
+ struct buffer_head *bh;
int ret = 0;
int still_dirty;
- page = find_lock_page(inode->i_mapping, index);
- if (!page)
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (IS_ERR(folio))
return -ENOENT;
- wait_on_page_writeback(page);
-
- first_block = (unsigned long)index <<
- (PAGE_SHIFT - inode->i_blkbits);
- if (page_has_buffers(page)) {
- struct buffer_head *bh;
+ folio_wait_writeback(folio);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ bh = folio_buffers(folio);
+ if (bh) {
+ unsigned long first_block = index <<
+ (PAGE_SHIFT - inode->i_blkbits);
+ bh = get_nth_bh(bh, block - first_block);
nilfs_forget_buffer(bh);
}
- still_dirty = PageDirty(page);
- unlock_page(page);
- put_page(page);
+ still_dirty = folio_test_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
if (still_dirty ||
invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -560,17 +558,19 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen;
- struct page *page;
+ struct folio *folio;
int blkbits = inode->i_blkbits;
- page = grab_cache_page(shadow->inode->i_mapping, bh->b_folio->index);
- if (!page)
- return -ENOMEM;
+ folio = filemap_grab_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, 0);
+ bh_frozen = folio_buffers(folio);
+ if (!bh_frozen)
+ bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0);
- bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
+ bh_frozen = get_nth_bh(bh_frozen, bh_offset(bh) >> blkbits);
if (!buffer_uptodate(bh_frozen))
nilfs_copy_buffer(bh_frozen, bh);
@@ -582,8 +582,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
brelse(bh_frozen); /* already frozen */
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return 0;
}
@@ -592,17 +592,19 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen = NULL;
- struct page *page;
+ struct folio *folio;
int n;
- page = find_lock_page(shadow->inode->i_mapping, bh->b_folio->index);
- if (page) {
- if (page_has_buffers(page)) {
+ folio = filemap_lock_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (!IS_ERR(folio)) {
+ bh_frozen = folio_buffers(folio);
+ if (bh_frozen) {
n = bh_offset(bh) >> inode->i_blkbits;
- bh_frozen = nilfs_page_get_nth_block(page, n);
+ bh_frozen = get_nth_bh(bh_frozen, n);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
return bh_frozen;
}
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index b4e54d079b7d..06b04758f289 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -25,19 +25,19 @@
(BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \
BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
-static struct buffer_head *
-__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
- int blkbits, unsigned long b_state)
+static struct buffer_head *__nilfs_get_folio_block(struct folio *folio,
+ unsigned long block, pgoff_t index, int blkbits,
+ unsigned long b_state)
{
unsigned long first_block;
- struct buffer_head *bh;
+ struct buffer_head *bh = folio_buffers(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, b_state);
+ if (!bh)
+ bh = create_empty_buffers(folio, 1 << blkbits, b_state);
first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ bh = get_nth_bh(bh, block - first_block);
touch_buffer(bh);
wait_on_buffer(bh);
@@ -51,17 +51,17 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
{
int blkbits = inode->i_blkbits;
pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
- page = grab_cache_page(mapping, index);
- if (unlikely(!page))
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
return NULL;
- bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
+ bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state);
if (unlikely(!bh)) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return NULL;
}
return bh;
@@ -184,30 +184,32 @@ void nilfs_page_bug(struct page *page)
}
/**
- * nilfs_copy_page -- copy the page with buffers
- * @dst: destination page
- * @src: source page
- * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
+ * nilfs_copy_folio -- copy the folio with buffers
+ * @dst: destination folio
+ * @src: source folio
+ * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads.
*
- * This function is for both data pages and btnode pages. The dirty flag
- * should be treated by caller. The page must not be under i/o.
- * Both src and dst page must be locked
+ * This function is for both data folios and btnode folios. The dirty flag
+ * should be treated by caller. The folio must not be under i/o.
+ * Both src and dst folio must be locked
*/
-static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
+static void nilfs_copy_folio(struct folio *dst, struct folio *src,
+ bool copy_dirty)
{
struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
- BUG_ON(PageWriteback(dst));
+ BUG_ON(folio_test_writeback(dst));
- sbh = page_buffers(src);
- if (!page_has_buffers(dst))
- create_empty_buffers(dst, sbh->b_size, 0);
+ sbh = folio_buffers(src);
+ dbh = folio_buffers(dst);
+ if (!dbh)
+ dbh = create_empty_buffers(dst, sbh->b_size, 0);
if (copy_dirty)
mask |= BIT(BH_Dirty);
- dbh = dbufs = page_buffers(dst);
+ dbufs = dbh;
do {
lock_buffer(sbh);
lock_buffer(dbh);
@@ -218,16 +220,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
dbh = dbh->b_this_page;
} while (dbh != dbufs);
- copy_highpage(dst, src);
+ folio_copy(dst, src);
- if (PageUptodate(src) && !PageUptodate(dst))
- SetPageUptodate(dst);
- else if (!PageUptodate(src) && PageUptodate(dst))
- ClearPageUptodate(dst);
- if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
- SetPageMappedToDisk(dst);
- else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
- ClearPageMappedToDisk(dst);
+ if (folio_test_uptodate(src) && !folio_test_uptodate(dst))
+ folio_mark_uptodate(dst);
+ else if (!folio_test_uptodate(src) && folio_test_uptodate(dst))
+ folio_clear_uptodate(dst);
+ if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst))
+ folio_set_mappedtodisk(dst);
+ else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst))
+ folio_clear_mappedtodisk(dst);
do {
unlock_buffer(sbh);
@@ -269,7 +271,7 @@ repeat:
NILFS_PAGE_BUG(&folio->page,
"found empty page in dat page cache");
- nilfs_copy_page(&dfolio->page, &folio->page, 1);
+ nilfs_copy_folio(dfolio, folio, true);
filemap_dirty_folio(folio_mapping(dfolio), dfolio);
folio_unlock(dfolio);
@@ -314,7 +316,7 @@ repeat:
if (!IS_ERR(dfolio)) {
/* overwrite existing folio in the destination cache */
WARN_ON(folio_test_dirty(dfolio));
- nilfs_copy_page(&dfolio->page, &folio->page, 0);
+ nilfs_copy_folio(dfolio, folio, false);
folio_unlock(dfolio);
folio_put(dfolio);
/* Do we not need to remove folio from smap here? */
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 21ddcdd4d63e..d249ea1cefff 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -52,15 +52,4 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
#define NILFS_PAGE_BUG(page, m, a...) \
do { nilfs_page_bug(page); BUG(); } while (0)
-static inline struct buffer_head *
-nilfs_page_get_nth_block(struct page *page, unsigned int count)
-{
- struct buffer_head *bh = page_buffers(page);
-
- while (count-- > 0)
- bh = bh->b_this_page;
- get_bh(bh);
- return bh;
-}
-
#endif /* _NILFS_PAGE_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7ec16879756e..55e31cc903d1 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -731,10 +731,9 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
continue;
}
head = folio_buffers(folio);
- if (!head) {
- create_empty_buffers(&folio->page, i_blocksize(inode), 0);
- head = folio_buffers(folio);
- }
+ if (!head)
+ head = create_empty_buffers(folio,
+ i_blocksize(inode), 0);
folio_unlock(folio);
bh = head;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 4e158bce4192..71e31e789b29 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -145,13 +145,12 @@ still_busy:
}
/**
- * ntfs_read_block - fill a @page of an address space with data
- * @page: page cache page to fill with data
+ * ntfs_read_block - fill a @folio of an address space with data
+ * @folio: page cache folio to fill with data
*
- * Fill the page @page of the address space belonging to the @page->host inode.
* We read each buffer asynchronously and when all buffers are read in, our io
* completion handler ntfs_end_buffer_read_async(), if required, automatically
- * applies the mst fixups to the page before finally marking it uptodate and
+ * applies the mst fixups to the folio before finally marking it uptodate and
* unlocking it.
*
* We only enforce allocated_size limit because i_size is checked for in
@@ -161,7 +160,7 @@ still_busy:
*
* Contains an adapted version of fs/buffer.c::block_read_full_folio().
*/
-static int ntfs_read_block(struct page *page)
+static int ntfs_read_block(struct folio *folio)
{
loff_t i_size;
VCN vcn;
@@ -178,7 +177,7 @@ static int ntfs_read_block(struct page *page)
int i, nr;
unsigned char blocksize_bits;
- vi = page->mapping->host;
+ vi = folio->mapping->host;
ni = NTFS_I(vi);
vol = ni->vol;
@@ -188,15 +187,10 @@ static int ntfs_read_block(struct page *page)
blocksize = vol->sb->s_blocksize;
blocksize_bits = vol->sb->s_blocksize_bits;
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, blocksize, 0);
- if (unlikely(!page_has_buffers(page))) {
- unlock_page(page);
- return -ENOMEM;
- }
- }
- bh = head = page_buffers(page);
- BUG_ON(!bh);
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio, blocksize, 0);
+ bh = head;
/*
* We may be racing with truncate. To avoid some of the problems we
@@ -205,11 +199,11 @@ static int ntfs_read_block(struct page *page)
* may leave some buffers unmapped which are now allocated. This is
* not a problem since these buffers will just get mapped when a write
* occurs. In case of a shrinking truncate, we will detect this later
- * on due to the runlist being incomplete and if the page is being
+ * on due to the runlist being incomplete and if the folio is being
* fully truncated, truncate will throw it away as soon as we unlock
* it so no need to worry what we do with it.
*/
- iblock = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
+ iblock = (s64)folio->index << (PAGE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
init_size = ni->initialized_size;
@@ -221,7 +215,7 @@ static int ntfs_read_block(struct page *page)
}
zblock = (init_size + blocksize - 1) >> blocksize_bits;
- /* Loop through all the buffers in the page. */
+ /* Loop through all the buffers in the folio. */
rl = NULL;
nr = i = 0;
do {
@@ -299,7 +293,7 @@ lock_retry_remap:
if (!err)
err = -EIO;
bh->b_blocknr = -1;
- SetPageError(page);
+ folio_set_error(folio);
ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
"attribute type 0x%x, vcn 0x%llx, "
"offset 0x%x because its location on "
@@ -312,13 +306,13 @@ lock_retry_remap:
/*
* Either iblock was outside lblock limits or
* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
- * of the page and set the buffer uptodate.
+ * of the folio and set the buffer uptodate.
*/
handle_hole:
bh->b_blocknr = -1UL;
clear_buffer_mapped(bh);
handle_zblock:
- zero_user(page, i * blocksize, blocksize);
+ folio_zero_range(folio, i * blocksize, blocksize);
if (likely(!err))
set_buffer_uptodate(bh);
} while (i++, iblock++, (bh = bh->b_this_page) != head);
@@ -349,11 +343,11 @@ handle_zblock:
return 0;
}
/* No i/o was scheduled on any of the buffers. */
- if (likely(!PageError(page)))
- SetPageUptodate(page);
+ if (likely(!folio_test_error(folio)))
+ folio_mark_uptodate(folio);
else /* Signal synchronous i/o error. */
nr = -EIO;
- unlock_page(page);
+ folio_unlock(folio);
return nr;
}
@@ -433,7 +427,7 @@ retry_readpage:
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
/* Normal, non-resident data stream. */
- return ntfs_read_block(page);
+ return ntfs_read_block(folio);
}
/*
* Attribute is resident, implying it is not compressed or encrypted.
@@ -507,28 +501,29 @@ err_out:
#ifdef NTFS_RW
/**
- * ntfs_write_block - write a @page to the backing store
- * @page: page cache page to write out
+ * ntfs_write_block - write a @folio to the backing store
+ * @folio: page cache folio to write out
* @wbc: writeback control structure
*
- * This function is for writing pages belonging to non-resident, non-mst
+ * This function is for writing folios belonging to non-resident, non-mst
* protected attributes to their backing store.
*
- * For a page with buffers, map and write the dirty buffers asynchronously
- * under page writeback. For a page without buffers, create buffers for the
- * page, then proceed as above.
+ * For a folio with buffers, map and write the dirty buffers asynchronously
+ * under folio writeback. For a folio without buffers, create buffers for the
+ * folio, then proceed as above.
*
- * If a page doesn't have buffers the page dirty state is definitive. If a page
- * does have buffers, the page dirty state is just a hint, and the buffer dirty
- * state is definitive. (A hint which has rules: dirty buffers against a clean
- * page is illegal. Other combinations are legal and need to be handled. In
- * particular a dirty page containing clean buffers for example.)
+ * If a folio doesn't have buffers the folio dirty state is definitive. If
+ * a folio does have buffers, the folio dirty state is just a hint,
+ * and the buffer dirty state is definitive. (A hint which has rules:
+ * dirty buffers against a clean folio is illegal. Other combinations are
+ * legal and need to be handled. In particular a dirty folio containing
+ * clean buffers for example.)
*
* Return 0 on success and -errno on error.
*
* Based on ntfs_read_block() and __block_write_full_folio().
*/
-static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
+static int ntfs_write_block(struct folio *folio, struct writeback_control *wbc)
{
VCN vcn;
LCN lcn;
@@ -546,41 +541,29 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
bool need_end_writeback;
unsigned char blocksize_bits;
- vi = page->mapping->host;
+ vi = folio->mapping->host;
ni = NTFS_I(vi);
vol = ni->vol;
ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx.", ni->mft_no, ni->type, page->index);
+ "0x%lx.", ni->mft_no, ni->type, folio->index);
BUG_ON(!NInoNonResident(ni));
BUG_ON(NInoMstProtected(ni));
blocksize = vol->sb->s_blocksize;
blocksize_bits = vol->sb->s_blocksize_bits;
- if (!page_has_buffers(page)) {
- BUG_ON(!PageUptodate(page));
- create_empty_buffers(page, blocksize,
+ head = folio_buffers(folio);
+ if (!head) {
+ BUG_ON(!folio_test_uptodate(folio));
+ head = create_empty_buffers(folio, blocksize,
(1 << BH_Uptodate) | (1 << BH_Dirty));
- if (unlikely(!page_has_buffers(page))) {
- ntfs_warning(vol->sb, "Error allocating page "
- "buffers. Redirtying page so we try "
- "again later.");
- /*
- * Put the page back on mapping->dirty_pages, but leave
- * its buffers' dirty state as-is.
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
}
- bh = head = page_buffers(page);
- BUG_ON(!bh);
+ bh = head;
/* NOTE: Different naming scheme to ntfs_read_block()! */
- /* The first block in the page. */
- block = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
+ /* The first block in the folio. */
+ block = (s64)folio->index << (PAGE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
i_size = i_size_read(vi);
@@ -597,14 +580,14 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
* Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
- * then we just miss that fact, and the page stays dirty.
+ * then we just miss that fact, and the folio stays dirty.
*
* Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
*/
/*
- * Loop through all the buffers in the page, mapping all the dirty
+ * Loop through all the buffers in the folio, mapping all the dirty
* buffers to disk addresses and handling any aliases from the
* underlying block device's mapping.
*/
@@ -616,13 +599,13 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
if (unlikely(block >= dblock)) {
/*
* Mapped buffers outside i_size will occur, because
- * this page can be outside i_size when there is a
+ * this folio can be outside i_size when there is a
* truncate in progress. The contents of such buffers
* were zeroed by ntfs_writepage().
*
* FIXME: What about the small race window where
* ntfs_writepage() has not done any clearing because
- * the page was within i_size but before we get here,
+ * the folio was within i_size but before we get here,
* vmtruncate() modifies i_size?
*/
clear_buffer_dirty(bh);
@@ -638,38 +621,38 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
if (unlikely((block >= iblock) &&
(initialized_size < i_size))) {
/*
- * If this page is fully outside initialized
- * size, zero out all pages between the current
- * initialized size and the current page. Just
+ * If this folio is fully outside initialized
+ * size, zero out all folios between the current
+ * initialized size and the current folio. Just
* use ntfs_read_folio() to do the zeroing
* transparently.
*/
if (block > iblock) {
// TODO:
- // For each page do:
- // - read_cache_page()
- // Again for each page do:
- // - wait_on_page_locked()
- // - Check (PageUptodate(page) &&
- // !PageError(page))
+ // For each folio do:
+ // - read_cache_folio()
+ // Again for each folio do:
+ // - wait_on_folio_locked()
+ // - Check (folio_test_uptodate(folio) &&
+ // !folio_test_error(folio))
// Update initialized size in the attribute and
// in the inode.
- // Again, for each page do:
+ // Again, for each folio do:
// block_dirty_folio();
- // put_page()
+ // folio_put()
// We don't need to wait on the writes.
// Update iblock.
}
/*
- * The current page straddles initialized size. Zero
+ * The current folio straddles initialized size. Zero
* all non-uptodate buffers and set them uptodate (and
* dirty?). Note, there aren't any non-uptodate buffers
- * if the page is uptodate.
- * FIXME: For an uptodate page, the buffers may need to
+ * if the folio is uptodate.
+ * FIXME: For an uptodate folio, the buffers may need to
* be written out because they were not initialized on
* disk before.
*/
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
// TODO:
// Zero any non-uptodate buffers up to i_size.
// Set them uptodate and dirty.
@@ -727,14 +710,14 @@ lock_retry_remap:
unsigned long *bpos, *bend;
/* Check if the buffer is zero. */
- kaddr = kmap_atomic(page);
- bpos = (unsigned long *)(kaddr + bh_offset(bh));
- bend = (unsigned long *)((u8*)bpos + blocksize);
+ kaddr = kmap_local_folio(folio, bh_offset(bh));
+ bpos = (unsigned long *)kaddr;
+ bend = (unsigned long *)(kaddr + blocksize);
do {
if (unlikely(*bpos))
break;
} while (likely(++bpos < bend));
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
if (bpos == bend) {
/*
* Buffer is zero and sparse, no need to write
@@ -774,7 +757,7 @@ lock_retry_remap:
if (err == -ENOENT || lcn == LCN_ENOENT) {
bh->b_blocknr = -1;
clear_buffer_dirty(bh);
- zero_user(page, bh_offset(bh), blocksize);
+ folio_zero_range(folio, bh_offset(bh), blocksize);
set_buffer_uptodate(bh);
err = 0;
continue;
@@ -801,7 +784,7 @@ lock_retry_remap:
bh = head;
/* Just an optimization, so ->read_folio() is not called later. */
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
int uptodate = 1;
do {
if (!buffer_uptodate(bh)) {
@@ -811,7 +794,7 @@ lock_retry_remap:
}
} while ((bh = bh->b_this_page) != head);
if (uptodate)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
/* Setup all mapped, dirty buffers for async write i/o. */
@@ -826,7 +809,7 @@ lock_retry_remap:
} else if (unlikely(err)) {
/*
* For the error case. The buffer may have been set
- * dirty during attachment to a dirty page.
+ * dirty during attachment to a dirty folio.
*/
if (err != -ENOMEM)
clear_buffer_dirty(bh);
@@ -839,20 +822,20 @@ lock_retry_remap:
err = 0;
else if (err == -ENOMEM) {
ntfs_warning(vol->sb, "Error allocating memory. "
- "Redirtying page so we try again "
+ "Redirtying folio so we try again "
"later.");
/*
- * Put the page back on mapping->dirty_pages, but
+ * Put the folio back on mapping->dirty_pages, but
* leave its buffer's dirty state as-is.
*/
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
err = 0;
} else
- SetPageError(page);
+ folio_set_error(folio);
}
- BUG_ON(PageWriteback(page));
- set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio); /* Keeps try_to_free_buffers() away. */
/* Submit the prepared buffers for i/o. */
need_end_writeback = true;
@@ -864,11 +847,11 @@ lock_retry_remap:
}
bh = next;
} while (bh != head);
- unlock_page(page);
+ folio_unlock(folio);
- /* If no i/o was started, need to end_page_writeback(). */
+ /* If no i/o was started, need to end writeback here. */
if (unlikely(need_end_writeback))
- end_page_writeback(page);
+ folio_end_writeback(folio);
ntfs_debug("Done.");
return err;
@@ -1337,8 +1320,9 @@ done:
*/
static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
loff_t i_size;
- struct inode *vi = page->mapping->host;
+ struct inode *vi = folio->mapping->host;
ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
char *addr;
ntfs_attr_search_ctx *ctx = NULL;
@@ -1347,14 +1331,13 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
int err;
retry_writepage:
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
i_size = i_size_read(vi);
- /* Is the page fully outside i_size? (truncate in progress) */
- if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+ /* Is the folio fully outside i_size? (truncate in progress) */
+ if (unlikely(folio->index >= (i_size + PAGE_SIZE - 1) >>
PAGE_SHIFT)) {
- struct folio *folio = page_folio(page);
/*
- * The page may have dirty, unmapped buffers. Make them
+ * The folio may have dirty, unmapped buffers. Make them
* freeable here, so the page does not leak.
*/
block_invalidate_folio(folio, 0, folio_size(folio));
@@ -1373,7 +1356,7 @@ retry_writepage:
if (ni->type != AT_INDEX_ALLOCATION) {
/* If file is encrypted, deny access, just like NT4. */
if (NInoEncrypted(ni)) {
- unlock_page(page);
+ folio_unlock(folio);
BUG_ON(ni->type != AT_DATA);
ntfs_debug("Denying write access to encrypted file.");
return -EACCES;
@@ -1384,14 +1367,14 @@ retry_writepage:
BUG_ON(ni->name_len);
// TODO: Implement and replace this with
// return ntfs_write_compressed_block(page);
- unlock_page(page);
+ folio_unlock(folio);
ntfs_error(vi->i_sb, "Writing to compressed files is "
"not supported yet. Sorry.");
return -EOPNOTSUPP;
}
// TODO: Implement and remove this check.
if (NInoNonResident(ni) && NInoSparse(ni)) {
- unlock_page(page);
+ folio_unlock(folio);
ntfs_error(vi->i_sb, "Writing to sparse files is not "
"supported yet. Sorry.");
return -EOPNOTSUPP;
@@ -1400,34 +1383,34 @@ retry_writepage:
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
/* We have to zero every time due to mmap-at-end-of-file. */
- if (page->index >= (i_size >> PAGE_SHIFT)) {
- /* The page straddles i_size. */
- unsigned int ofs = i_size & ~PAGE_MASK;
- zero_user_segment(page, ofs, PAGE_SIZE);
+ if (folio->index >= (i_size >> PAGE_SHIFT)) {
+ /* The folio straddles i_size. */
+ unsigned int ofs = i_size & (folio_size(folio) - 1);
+ folio_zero_segment(folio, ofs, folio_size(folio));
}
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
return ntfs_write_mst_block(page, wbc);
/* Normal, non-resident data stream. */
- return ntfs_write_block(page, wbc);
+ return ntfs_write_block(folio, wbc);
}
/*
* Attribute is resident, implying it is not compressed, encrypted, or
* mst protected. This also means the attribute is smaller than an mft
- * record and hence smaller than a page, so can simply return error on
- * any pages with index above 0. Note the attribute can actually be
+ * record and hence smaller than a folio, so can simply return error on
+ * any folios with index above 0. Note the attribute can actually be
* marked compressed but if it is resident the actual data is not
* compressed so we are ok to ignore the compressed flag here.
*/
- BUG_ON(page_has_buffers(page));
- BUG_ON(!PageUptodate(page));
- if (unlikely(page->index > 0)) {
- ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
- "Aborting write.", page->index);
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
- end_page_writeback(page);
+ BUG_ON(folio_buffers(folio));
+ BUG_ON(!folio_test_uptodate(folio));
+ if (unlikely(folio->index > 0)) {
+ ntfs_error(vi->i_sb, "BUG()! folio->index (0x%lx) > 0. "
+ "Aborting write.", folio->index);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+ folio_end_writeback(folio);
return -EIO;
}
if (!NInoAttr(ni))
@@ -1460,12 +1443,12 @@ retry_writepage:
if (unlikely(err))
goto err_out;
/*
- * Keep the VM happy. This must be done otherwise the radix-tree tag
- * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
+ * Keep the VM happy. This must be done otherwise
+ * PAGECACHE_TAG_DIRTY remains set even though the folio is clean.
*/
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(vi);
if (unlikely(attr_len > i_size)) {
@@ -1480,18 +1463,18 @@ retry_writepage:
/* Shrinking cannot fail. */
BUG_ON(err);
}
- addr = kmap_atomic(page);
- /* Copy the data from the page to the mft record. */
+ addr = kmap_local_folio(folio, 0);
+ /* Copy the data from the folio to the mft record. */
memcpy((u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
addr, attr_len);
- /* Zero out of bounds area in the page cache page. */
- memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
- kunmap_atomic(addr);
- flush_dcache_page(page);
+ /* Zero out of bounds area in the page cache folio. */
+ memset(addr + attr_len, 0, folio_size(folio) - attr_len);
+ kunmap_local(addr);
+ flush_dcache_folio(folio);
flush_dcache_mft_record_page(ctx->ntfs_ino);
- /* We are done with the page. */
- end_page_writeback(page);
+ /* We are done with the folio. */
+ folio_end_writeback(folio);
/* Finally, mark the mft record dirty, so it gets written back. */
mark_mft_record_dirty(ctx->ntfs_ino);
ntfs_attr_put_search_ctx(ctx);
@@ -1502,18 +1485,18 @@ err_out:
ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
"page so we try again later.");
/*
- * Put the page back on mapping->dirty_pages, but leave its
+ * Put the folio back on mapping->dirty_pages, but leave its
* buffers' dirty state as-is.
*/
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
err = 0;
} else {
ntfs_error(vi->i_sb, "Resident attribute write failed with "
"error %i.", err);
- SetPageError(page);
+ folio_set_error(folio);
NVolSetErrors(ni->vol);
}
- unlock_page(page);
+ folio_unlock(folio);
if (ctx)
ntfs_attr_put_search_ctx(ctx);
if (m)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index cbc545999cfe..297c0b9db621 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -567,7 +567,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
LCN lcn;
s64 bh_pos, vcn_len, end, initialized_size;
sector_t lcn_block;
- struct page *page;
+ struct folio *folio;
struct inode *vi;
ntfs_inode *ni, *base_ni = NULL;
ntfs_volume *vol;
@@ -601,20 +601,6 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
(long long)pos, bytes);
blocksize = vol->sb->s_blocksize;
blocksize_bits = vol->sb->s_blocksize_bits;
- u = 0;
- do {
- page = pages[u];
- BUG_ON(!page);
- /*
- * create_empty_buffers() will create uptodate/dirty buffers if
- * the page is uptodate/dirty.
- */
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, blocksize, 0);
- if (unlikely(!page_has_buffers(page)))
- return -ENOMEM;
- }
- } while (++u < nr_pages);
rl_write_locked = false;
rl = NULL;
err = 0;
@@ -626,14 +612,21 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
end = pos + bytes;
cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
/*
- * Loop over each page and for each page over each buffer. Use goto to
+ * Loop over each buffer in each folio. Use goto to
* reduce indentation.
*/
u = 0;
-do_next_page:
- page = pages[u];
- bh_pos = (s64)page->index << PAGE_SHIFT;
- bh = head = page_buffers(page);
+do_next_folio:
+ folio = page_folio(pages[u]);
+ bh_pos = folio_pos(folio);
+ head = folio_buffers(folio);
+ if (!head)
+ /*
+ * create_empty_buffers() will create uptodate/dirty
+ * buffers if the folio is uptodate/dirty.
+ */
+ head = create_empty_buffers(folio, blocksize, 0);
+ bh = head;
do {
VCN cdelta;
s64 bh_end;
@@ -653,15 +646,15 @@ do_next_page:
if (buffer_uptodate(bh))
continue;
/*
- * The buffer is not uptodate. If the page is uptodate
+ * The buffer is not uptodate. If the folio is uptodate
* set the buffer uptodate and otherwise ignore it.
*/
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
set_buffer_uptodate(bh);
continue;
}
/*
- * Neither the page nor the buffer are uptodate. If
+ * Neither the folio nor the buffer are uptodate. If
* the buffer is only partially being written to, we
* need to read it in before the write, i.e. now.
*/
@@ -679,7 +672,7 @@ do_next_page:
ntfs_submit_bh_for_read(bh);
*wait_bh++ = bh;
} else {
- zero_user(page, bh_offset(bh),
+ folio_zero_range(folio, bh_offset(bh),
blocksize);
set_buffer_uptodate(bh);
}
@@ -706,7 +699,7 @@ map_buffer_cached:
(bh_cofs >> blocksize_bits);
set_buffer_mapped(bh);
/*
- * If the page is uptodate so is the buffer. If the
+ * If the folio is uptodate so is the buffer. If the
* buffer is fully outside the write, we ignore it if
* it was already allocated and we mark it dirty so it
* gets written out if we allocated it. On the other
@@ -714,7 +707,7 @@ map_buffer_cached:
* marking it dirty we set buffer_new so we can do
* error recovery.
*/
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
if (unlikely(was_hole)) {
@@ -754,7 +747,8 @@ map_buffer_cached:
ntfs_submit_bh_for_read(bh);
*wait_bh++ = bh;
} else {
- zero_user(page, bh_offset(bh),
+ folio_zero_range(folio,
+ bh_offset(bh),
blocksize);
set_buffer_uptodate(bh);
}
@@ -773,7 +767,7 @@ map_buffer_cached:
*/
if (bh_end <= pos || bh_pos >= end) {
if (!buffer_uptodate(bh)) {
- zero_user(page, bh_offset(bh),
+ folio_zero_range(folio, bh_offset(bh),
blocksize);
set_buffer_uptodate(bh);
}
@@ -786,7 +780,7 @@ map_buffer_cached:
u8 *kaddr;
unsigned pofs;
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_folio(folio, 0);
if (bh_pos < pos) {
pofs = bh_pos & ~PAGE_MASK;
memset(kaddr + pofs, 0, pos - bh_pos);
@@ -795,8 +789,8 @@ map_buffer_cached:
pofs = end & ~PAGE_MASK;
memset(kaddr + pofs, 0, bh_end - end);
}
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ kunmap_local(kaddr);
+ flush_dcache_folio(folio);
}
continue;
}
@@ -809,11 +803,12 @@ map_buffer_cached:
initialized_size = ni->allocated_size;
read_unlock_irqrestore(&ni->size_lock, flags);
if (bh_pos > initialized_size) {
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh)) {
- zero_user(page, bh_offset(bh), blocksize);
+ folio_zero_range(folio, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
continue;
@@ -927,17 +922,17 @@ rl_not_mapped_enoent:
bh->b_blocknr = -1;
/*
* If the buffer is uptodate we skip it. If it
- * is not but the page is uptodate, we can set
- * the buffer uptodate. If the page is not
+ * is not but the folio is uptodate, we can set
+ * the buffer uptodate. If the folio is not
* uptodate, we can clear the buffer and set it
* uptodate. Whether this is worthwhile is
* debatable and this could be removed.
*/
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh)) {
- zero_user(page, bh_offset(bh),
+ folio_zero_range(folio, bh_offset(bh),
blocksize);
set_buffer_uptodate(bh);
}
@@ -1167,7 +1162,7 @@ rl_not_mapped_enoent:
} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
/* If there are no errors, do the next page. */
if (likely(!err && ++u < nr_pages))
- goto do_next_page;
+ goto do_next_folio;
/* If there are no errors, release the runlist lock if we took it. */
if (likely(!err)) {
if (unlikely(rl_write_locked)) {
@@ -1185,9 +1180,8 @@ rl_not_mapped_enoent:
bh = *--wait_bh;
wait_on_buffer(bh);
if (likely(buffer_uptodate(bh))) {
- page = bh->b_page;
- bh_pos = ((s64)page->index << PAGE_SHIFT) +
- bh_offset(bh);
+ folio = bh->b_folio;
+ bh_pos = folio_pos(folio) + bh_offset(bh);
/*
* If the buffer overflows the initialized size, need
* to zero the overflowing region.
@@ -1197,7 +1191,7 @@ rl_not_mapped_enoent:
if (likely(bh_pos < initialized_size))
ofs = initialized_size - bh_pos;
- zero_user_segment(page, bh_offset(bh) + ofs,
+ folio_zero_segment(folio, bh_offset(bh) + ofs,
blocksize);
}
} else /* if (unlikely(!buffer_uptodate(bh))) */
@@ -1324,21 +1318,20 @@ rl_not_mapped_enoent:
u = 0;
end = bh_cpos << vol->cluster_size_bits;
do {
- page = pages[u];
- bh = head = page_buffers(page);
+ folio = page_folio(pages[u]);
+ bh = head = folio_buffers(folio);
do {
if (u == nr_pages &&
- ((s64)page->index << PAGE_SHIFT) +
- bh_offset(bh) >= end)
+ folio_pos(folio) + bh_offset(bh) >= end)
break;
if (!buffer_new(bh))
continue;
clear_buffer_new(bh);
if (!buffer_uptodate(bh)) {
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
else {
- zero_user(page, bh_offset(bh),
+ folio_zero_range(folio, bh_offset(bh),
blocksize);
set_buffer_uptodate(bh);
}
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index ad4a70b5d432..a5a30a24ce5d 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -187,7 +187,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
struct buffer_head *head, *bh;
u32 bh_next, bh_off, to;
sector_t iblock;
- struct page *page;
+ struct folio *folio;
for (; idx < idx_end; idx += 1, from = 0) {
page_off = (loff_t)idx << PAGE_SHIFT;
@@ -195,16 +195,17 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
PAGE_SIZE;
iblock = page_off >> inode->i_blkbits;
- page = find_or_create_page(mapping, idx,
- mapping_gfp_constraint(mapping,
- ~__GFP_FS));
- if (!page)
- return -ENOMEM;
+ folio = __filemap_get_folio(mapping, idx,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio, blocksize, 0);
- bh = head = page_buffers(page);
+ bh = head;
bh_off = 0;
do {
bh_next = bh_off + blocksize;
@@ -220,14 +221,14 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
}
/* Ok, it's mapped. Make sure it's up-to-date. */
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
err = bh_read(bh, 0);
if (err < 0) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto out;
}
}
@@ -237,10 +238,10 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
} while (bh_off = bh_next, iblock += 1,
head != (bh = bh->b_this_page));
- zero_user_segment(page, from, to);
+ folio_zero_segment(folio, from, to);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
cond_resched();
}
out:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 6ab03494fc6e..ba790219d528 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -568,10 +568,10 @@ static void ocfs2_clear_page_regions(struct page *page,
* read-in the blocks at the tail of our file. Avoid reading them by
* testing i_size against each block offset.
*/
-static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
+static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio,
unsigned int block_start)
{
- u64 offset = page_offset(page) + block_start;
+ u64 offset = folio_pos(folio) + block_start;
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
return 1;
@@ -593,15 +593,16 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new)
{
+ struct folio *folio = page_folio(page);
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
unsigned int block_end, block_start;
unsigned int bsize = i_blocksize(inode);
- if (!page_has_buffers(page))
- create_empty_buffers(page, bsize, 0);
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio, bsize, 0);
- head = page_buffers(page);
for (bh = head, block_start = 0; bh != head || !block_start;
bh = bh->b_this_page, block_start += bsize) {
block_end = block_start + bsize;
@@ -613,7 +614,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
* they may belong to unallocated clusters.
*/
if (block_start >= to || block_end <= from) {
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
continue;
}
@@ -630,11 +631,11 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
clean_bdev_bh_alias(bh);
}
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_new(bh) &&
- ocfs2_should_read_blk(inode, page, block_start) &&
+ ocfs2_should_read_blk(inode, folio, block_start) &&
(block_start < from || block_end > to)) {
bh_read_nowait(bh, 0);
*wait_bh++=bh;
@@ -668,7 +669,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
if (block_start >= to)
break;
- zero_user(page, block_start, bh->b_size);
+ folio_zero_range(folio, block_start, bh->b_size);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 23fc24d16b31..6422e569b080 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -546,7 +546,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
* and explicitly excluded physical ranges.
*/
if (!page || PageOffline(page) ||
- is_page_hwpoison(page) || !pfn_is_ram(pfn)) {
+ is_page_hwpoison(page) || !pfn_is_ram(pfn) ||
+ pfn_is_unaccepted_memory(pfn)) {
if (iov_iter_zero(tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9191248f2dac..b55dbc70287b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -188,7 +188,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
/* procfs dentries and inodes don't require IO to create */
- s->s_shrink.seeks = 0;
+ s->s_shrink->seeks = 0;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1593940ca01e..4abd51053f76 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -20,6 +20,8 @@
#include <linux/shmem_fs.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
+#include <linux/minmax.h>
+#include <linux/overflow.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -1761,11 +1763,737 @@ static int pagemap_release(struct inode *inode, struct file *file)
return 0;
}
+#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
+ PAGE_IS_FILE | PAGE_IS_PRESENT | \
+ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
+ PAGE_IS_HUGE)
+#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
+
+struct pagemap_scan_private {
+ struct pm_scan_arg arg;
+ unsigned long masks_of_interest, cur_vma_category;
+ struct page_region *vec_buf;
+ unsigned long vec_buf_len, vec_buf_index, found_pages;
+ struct page_region __user *vec_out;
+};
+
+static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
+ struct vm_area_struct *vma,
+ unsigned long addr, pte_t pte)
+{
+ unsigned long categories = 0;
+
+ if (pte_present(pte)) {
+ struct page *page;
+
+ categories |= PAGE_IS_PRESENT;
+ if (!pte_uffd_wp(pte))
+ categories |= PAGE_IS_WRITTEN;
+
+ if (p->masks_of_interest & PAGE_IS_FILE) {
+ page = vm_normal_page(vma, addr, pte);
+ if (page && !PageAnon(page))
+ categories |= PAGE_IS_FILE;
+ }
+
+ if (is_zero_pfn(pte_pfn(pte)))
+ categories |= PAGE_IS_PFNZERO;
+ } else if (is_swap_pte(pte)) {
+ swp_entry_t swp;
+
+ categories |= PAGE_IS_SWAPPED;
+ if (!pte_swp_uffd_wp_any(pte))
+ categories |= PAGE_IS_WRITTEN;
+
+ if (p->masks_of_interest & PAGE_IS_FILE) {
+ swp = pte_to_swp_entry(pte);
+ if (is_pfn_swap_entry(swp) &&
+ !PageAnon(pfn_swap_entry_to_page(swp)))
+ categories |= PAGE_IS_FILE;
+ }
+ }
+
+ return categories;
+}
+
+static void make_uffd_wp_pte(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
+{
+ pte_t ptent = ptep_get(pte);
+
+ if (pte_present(ptent)) {
+ pte_t old_pte;
+
+ old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ptent = pte_mkuffd_wp(ptent);
+ ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
+ } else if (is_swap_pte(ptent)) {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
+ } else {
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ }
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
+ struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmd)
+{
+ unsigned long categories = PAGE_IS_HUGE;
+
+ if (pmd_present(pmd)) {
+ struct page *page;
+
+ categories |= PAGE_IS_PRESENT;
+ if (!pmd_uffd_wp(pmd))
+ categories |= PAGE_IS_WRITTEN;
+
+ if (p->masks_of_interest & PAGE_IS_FILE) {
+ page = vm_normal_page_pmd(vma, addr, pmd);
+ if (page && !PageAnon(page))
+ categories |= PAGE_IS_FILE;
+ }
+
+ if (is_zero_pfn(pmd_pfn(pmd)))
+ categories |= PAGE_IS_PFNZERO;
+ } else if (is_swap_pmd(pmd)) {
+ swp_entry_t swp;
+
+ categories |= PAGE_IS_SWAPPED;
+ if (!pmd_swp_uffd_wp(pmd))
+ categories |= PAGE_IS_WRITTEN;
+
+ if (p->masks_of_interest & PAGE_IS_FILE) {
+ swp = pmd_to_swp_entry(pmd);
+ if (is_pfn_swap_entry(swp) &&
+ !PageAnon(pfn_swap_entry_to_page(swp)))
+ categories |= PAGE_IS_FILE;
+ }
+ }
+
+ return categories;
+}
+
+static void make_uffd_wp_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old, pmd = *pmdp;
+
+ if (pmd_present(pmd)) {
+ old = pmdp_invalidate_ad(vma, addr, pmdp);
+ pmd = pmd_mkuffd_wp(old);
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ pmd = pmd_swp_mkuffd_wp(pmd);
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ }
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_HUGETLB_PAGE
+static unsigned long pagemap_hugetlb_category(pte_t pte)
+{
+ unsigned long categories = PAGE_IS_HUGE;
+
+ /*
+ * According to pagemap_hugetlb_range(), file-backed HugeTLB
+ * page cannot be swapped. So PAGE_IS_FILE is not checked for
+ * swapped pages.
+ */
+ if (pte_present(pte)) {
+ categories |= PAGE_IS_PRESENT;
+ if (!huge_pte_uffd_wp(pte))
+ categories |= PAGE_IS_WRITTEN;
+ if (!PageAnon(pte_page(pte)))
+ categories |= PAGE_IS_FILE;
+ if (is_zero_pfn(pte_pfn(pte)))
+ categories |= PAGE_IS_PFNZERO;
+ } else if (is_swap_pte(pte)) {
+ categories |= PAGE_IS_SWAPPED;
+ if (!pte_swp_uffd_wp_any(pte))
+ categories |= PAGE_IS_WRITTEN;
+ }
+
+ return categories;
+}
+
+static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t ptent)
+{
+ unsigned long psize;
+
+ if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent))
+ return;
+
+ psize = huge_page_size(hstate_vma(vma));
+
+ if (is_hugetlb_entry_migration(ptent))
+ set_huge_pte_at(vma->vm_mm, addr, ptep,
+ pte_swp_mkuffd_wp(ptent), psize);
+ else if (!huge_pte_none(ptent))
+ huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
+ huge_pte_mkuffd_wp(ptent));
+ else
+ set_huge_pte_at(vma->vm_mm, addr, ptep,
+ make_pte_marker(PTE_MARKER_UFFD_WP), psize);
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static void pagemap_scan_backout_range(struct pagemap_scan_private *p,
+ unsigned long addr, unsigned long end)
+{
+ struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
+
+ if (cur_buf->start != addr)
+ cur_buf->end = addr;
+ else
+ cur_buf->start = cur_buf->end = 0;
+
+ p->found_pages -= (end - addr) / PAGE_SIZE;
+}
+#endif
+
+static bool pagemap_scan_is_interesting_page(unsigned long categories,
+ const struct pagemap_scan_private *p)
+{
+ categories ^= p->arg.category_inverted;
+ if ((categories & p->arg.category_mask) != p->arg.category_mask)
+ return false;
+ if (p->arg.category_anyof_mask && !(categories & p->arg.category_anyof_mask))
+ return false;
+
+ return true;
+}
+
+static bool pagemap_scan_is_interesting_vma(unsigned long categories,
+ const struct pagemap_scan_private *p)
+{
+ unsigned long required = p->arg.category_mask & PAGE_IS_WPALLOWED;
+
+ categories ^= p->arg.category_inverted;
+ if ((categories & required) != required)
+ return false;
+
+ return true;
+}
+
+static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct pagemap_scan_private *p = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ unsigned long vma_category = 0;
+
+ if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma))
+ vma_category |= PAGE_IS_WPALLOWED;
+ else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
+ return -EPERM;
+
+ if (vma->vm_flags & VM_PFNMAP)
+ return 1;
+
+ if (!pagemap_scan_is_interesting_vma(vma_category, p))
+ return 1;
+
+ p->cur_vma_category = vma_category;
+
+ return 0;
+}
+
+static bool pagemap_scan_push_range(unsigned long categories,
+ struct pagemap_scan_private *p,
+ unsigned long addr, unsigned long end)
+{
+ struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
+
+ /*
+ * When there is no output buffer provided at all, the sentinel values
+ * won't match here. There is no other way for `cur_buf->end` to be
+ * non-zero other than it being non-empty.
+ */
+ if (addr == cur_buf->end && categories == cur_buf->categories) {
+ cur_buf->end = end;
+ return true;
+ }
+
+ if (cur_buf->end) {
+ if (p->vec_buf_index >= p->vec_buf_len - 1)
+ return false;
+
+ cur_buf = &p->vec_buf[++p->vec_buf_index];
+ }
+
+ cur_buf->start = addr;
+ cur_buf->end = end;
+ cur_buf->categories = categories;
+
+ return true;
+}
+
+static int pagemap_scan_output(unsigned long categories,
+ struct pagemap_scan_private *p,
+ unsigned long addr, unsigned long *end)
+{
+ unsigned long n_pages, total_pages;
+ int ret = 0;
+
+ if (!p->vec_buf)
+ return 0;
+
+ categories &= p->arg.return_mask;
+
+ n_pages = (*end - addr) / PAGE_SIZE;
+ if (check_add_overflow(p->found_pages, n_pages, &total_pages) ||
+ total_pages > p->arg.max_pages) {
+ size_t n_too_much = total_pages - p->arg.max_pages;
+ *end -= n_too_much * PAGE_SIZE;
+ n_pages -= n_too_much;
+ ret = -ENOSPC;
+ }
+
+ if (!pagemap_scan_push_range(categories, p, addr, *end)) {
+ *end = addr;
+ n_pages = 0;
+ ret = -ENOSPC;
+ }
+
+ p->found_pages += n_pages;
+ if (ret)
+ p->arg.walk_end = *end;
+
+ return ret;
+}
+
+static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct pagemap_scan_private *p = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ unsigned long categories;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (!ptl)
+ return -ENOENT;
+
+ categories = p->cur_vma_category |
+ pagemap_thp_category(p, vma, start, *pmd);
+
+ if (!pagemap_scan_is_interesting_page(categories, p))
+ goto out_unlock;
+
+ ret = pagemap_scan_output(categories, p, start, &end);
+ if (start == end)
+ goto out_unlock;
+
+ if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+ goto out_unlock;
+ if (~categories & PAGE_IS_WRITTEN)
+ goto out_unlock;
+
+ /*
+ * Break huge page into small pages if the WP operation
+ * needs to be performed on a portion of the huge page.
+ */
+ if (end != start + HPAGE_SIZE) {
+ spin_unlock(ptl);
+ split_huge_pmd(vma, pmd, start);
+ pagemap_scan_backout_range(p, start, end);
+ /* Report as if there was no THP */
+ return -ENOENT;
+ }
+
+ make_uffd_wp_pmd(vma, start, pmd);
+ flush_tlb_range(vma, start, end);
+out_unlock:
+ spin_unlock(ptl);
+ return ret;
+#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
+ return -ENOENT;
+#endif
+}
+
+static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct pagemap_scan_private *p = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ unsigned long addr, flush_end = 0;
+ pte_t *pte, *start_pte;
+ spinlock_t *ptl;
+ int ret;
+
+ arch_enter_lazy_mmu_mode();
+
+ ret = pagemap_scan_thp_entry(pmd, start, end, walk);
+ if (ret != -ENOENT) {
+ arch_leave_lazy_mmu_mode();
+ return ret;
+ }
+
+ ret = 0;
+ start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
+ if (!pte) {
+ arch_leave_lazy_mmu_mode();
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
+
+ if (!p->vec_out) {
+ /* Fast path for performing exclusive WP */
+ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
+ if (pte_uffd_wp(ptep_get(pte)))
+ continue;
+ make_uffd_wp_pte(vma, addr, pte);
+ if (!flush_end)
+ start = addr;
+ flush_end = addr + PAGE_SIZE;
+ }
+ goto flush_and_return;
+ }
+
+ if (!p->arg.category_anyof_mask && !p->arg.category_inverted &&
+ p->arg.category_mask == PAGE_IS_WRITTEN &&
+ p->arg.return_mask == PAGE_IS_WRITTEN) {
+ for (addr = start; addr < end; pte++, addr += PAGE_SIZE) {
+ unsigned long next = addr + PAGE_SIZE;
+
+ if (pte_uffd_wp(ptep_get(pte)))
+ continue;
+ ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN,
+ p, addr, &next);
+ if (next == addr)
+ break;
+ if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+ continue;
+ make_uffd_wp_pte(vma, addr, pte);
+ if (!flush_end)
+ start = addr;
+ flush_end = next;
+ }
+ goto flush_and_return;
+ }
+
+ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
+ unsigned long categories = p->cur_vma_category |
+ pagemap_page_category(p, vma, addr, ptep_get(pte));
+ unsigned long next = addr + PAGE_SIZE;
+
+ if (!pagemap_scan_is_interesting_page(categories, p))
+ continue;
+
+ ret = pagemap_scan_output(categories, p, addr, &next);
+ if (next == addr)
+ break;
+
+ if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+ continue;
+ if (~categories & PAGE_IS_WRITTEN)
+ continue;
+
+ make_uffd_wp_pte(vma, addr, pte);
+ if (!flush_end)
+ start = addr;
+ flush_end = next;
+ }
+
+flush_and_return:
+ if (flush_end)
+ flush_tlb_range(vma, start, addr);
+
+ pte_unmap_unlock(start_pte, ptl);
+ arch_leave_lazy_mmu_mode();
+
+ cond_resched();
+ return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
+ unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct pagemap_scan_private *p = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ unsigned long categories;
+ spinlock_t *ptl;
+ int ret = 0;
+ pte_t pte;
+
+ if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
+ /* Go the short route when not write-protecting pages. */
+
+ pte = huge_ptep_get(ptep);
+ categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
+
+ if (!pagemap_scan_is_interesting_page(categories, p))
+ return 0;
+
+ return pagemap_scan_output(categories, p, start, &end);
+ }
+
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
+
+ pte = huge_ptep_get(ptep);
+ categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
+
+ if (!pagemap_scan_is_interesting_page(categories, p))
+ goto out_unlock;
+
+ ret = pagemap_scan_output(categories, p, start, &end);
+ if (start == end)
+ goto out_unlock;
+
+ if (~categories & PAGE_IS_WRITTEN)
+ goto out_unlock;
+
+ if (end != start + HPAGE_SIZE) {
+ /* Partial HugeTLB page WP isn't possible. */
+ pagemap_scan_backout_range(p, start, end);
+ p->arg.walk_end = start;
+ ret = 0;
+ goto out_unlock;
+ }
+
+ make_uffd_wp_huge_pte(vma, start, ptep, pte);
+ flush_hugetlb_tlb_range(vma, start, end);
+
+out_unlock:
+ spin_unlock(ptl);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
+ return ret;
+}
+#else
+#define pagemap_scan_hugetlb_entry NULL
+#endif
+
+static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end,
+ int depth, struct mm_walk *walk)
+{
+ struct pagemap_scan_private *p = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ int ret, err;
+
+ if (!vma || !pagemap_scan_is_interesting_page(p->cur_vma_category, p))
+ return 0;
+
+ ret = pagemap_scan_output(p->cur_vma_category, p, addr, &end);
+ if (addr == end)
+ return ret;
+
+ if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+ return ret;
+
+ err = uffd_wp_range(vma, addr, end - addr, true);
+ if (err < 0)
+ ret = err;
+
+ return ret;
+}
+
+static const struct mm_walk_ops pagemap_scan_ops = {
+ .test_walk = pagemap_scan_test_walk,
+ .pmd_entry = pagemap_scan_pmd_entry,
+ .pte_hole = pagemap_scan_pte_hole,
+ .hugetlb_entry = pagemap_scan_hugetlb_entry,
+};
+
+static int pagemap_scan_get_args(struct pm_scan_arg *arg,
+ unsigned long uarg)
+{
+ if (copy_from_user(arg, (void __user *)uarg, sizeof(*arg)))
+ return -EFAULT;
+
+ if (arg->size != sizeof(struct pm_scan_arg))
+ return -EINVAL;
+
+ /* Validate requested features */
+ if (arg->flags & ~PM_SCAN_FLAGS)
+ return -EINVAL;
+ if ((arg->category_inverted | arg->category_mask |
+ arg->category_anyof_mask | arg->return_mask) & ~PM_SCAN_CATEGORIES)
+ return -EINVAL;
+
+ arg->start = untagged_addr((unsigned long)arg->start);
+ arg->end = untagged_addr((unsigned long)arg->end);
+ arg->vec = untagged_addr((unsigned long)arg->vec);
+
+ /* Validate memory pointers */
+ if (!IS_ALIGNED(arg->start, PAGE_SIZE))
+ return -EINVAL;
+ if (!access_ok((void __user *)(long)arg->start, arg->end - arg->start))
+ return -EFAULT;
+ if (!arg->vec && arg->vec_len)
+ return -EINVAL;
+ if (arg->vec && !access_ok((void __user *)(long)arg->vec,
+ arg->vec_len * sizeof(struct page_region)))
+ return -EFAULT;
+
+ /* Fixup default values */
+ arg->end = ALIGN(arg->end, PAGE_SIZE);
+ arg->walk_end = 0;
+ if (!arg->max_pages)
+ arg->max_pages = ULONG_MAX;
+
+ return 0;
+}
+
+static int pagemap_scan_writeback_args(struct pm_scan_arg *arg,
+ unsigned long uargl)
+{
+ struct pm_scan_arg __user *uarg = (void __user *)uargl;
+
+ if (copy_to_user(&uarg->walk_end, &arg->walk_end, sizeof(arg->walk_end)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int pagemap_scan_init_bounce_buffer(struct pagemap_scan_private *p)
+{
+ if (!p->arg.vec_len)
+ return 0;
+
+ p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT,
+ p->arg.vec_len);
+ p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf),
+ GFP_KERNEL);
+ if (!p->vec_buf)
+ return -ENOMEM;
+
+ p->vec_buf->start = p->vec_buf->end = 0;
+ p->vec_out = (struct page_region __user *)(long)p->arg.vec;
+
+ return 0;
+}
+
+static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p)
+{
+ const struct page_region *buf = p->vec_buf;
+ long n = p->vec_buf_index;
+
+ if (!p->vec_buf)
+ return 0;
+
+ if (buf[n].end != buf[n].start)
+ n++;
+
+ if (!n)
+ return 0;
+
+ if (copy_to_user(p->vec_out, buf, n * sizeof(*buf)))
+ return -EFAULT;
+
+ p->arg.vec_len -= n;
+ p->vec_out += n;
+
+ p->vec_buf_index = 0;
+ p->vec_buf_len = min_t(size_t, p->vec_buf_len, p->arg.vec_len);
+ p->vec_buf->start = p->vec_buf->end = 0;
+
+ return n;
+}
+
+static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
+{
+ struct mmu_notifier_range range;
+ struct pagemap_scan_private p = {0};
+ unsigned long walk_start;
+ size_t n_ranges_out = 0;
+ int ret;
+
+ ret = pagemap_scan_get_args(&p.arg, uarg);
+ if (ret)
+ return ret;
+
+ p.masks_of_interest = p.arg.category_mask | p.arg.category_anyof_mask |
+ p.arg.return_mask;
+ ret = pagemap_scan_init_bounce_buffer(&p);
+ if (ret)
+ return ret;
+
+ /* Protection change for the range is going to happen. */
+ if (p.arg.flags & PM_SCAN_WP_MATCHING) {
+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0,
+ mm, p.arg.start, p.arg.end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
+
+ for (walk_start = p.arg.start; walk_start < p.arg.end;
+ walk_start = p.arg.walk_end) {
+ long n_out;
+
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
+ ret = mmap_read_lock_killable(mm);
+ if (ret)
+ break;
+ ret = walk_page_range(mm, walk_start, p.arg.end,
+ &pagemap_scan_ops, &p);
+ mmap_read_unlock(mm);
+
+ n_out = pagemap_scan_flush_buffer(&p);
+ if (n_out < 0)
+ ret = n_out;
+ else
+ n_ranges_out += n_out;
+
+ if (ret != -ENOSPC)
+ break;
+
+ if (p.arg.vec_len == 0 || p.found_pages == p.arg.max_pages)
+ break;
+ }
+
+ /* ENOSPC signifies early stop (buffer full) from the walk. */
+ if (!ret || ret == -ENOSPC)
+ ret = n_ranges_out;
+
+ /* The walk_end isn't set when ret is zero */
+ if (!p.arg.walk_end)
+ p.arg.walk_end = p.arg.end;
+ if (pagemap_scan_writeback_args(&p.arg, uarg))
+ ret = -EFAULT;
+
+ if (p.arg.flags & PM_SCAN_WP_MATCHING)
+ mmu_notifier_invalidate_range_end(&range);
+
+ kfree(p.vec_buf);
+ return ret;
+}
+
+static long do_pagemap_cmd(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mm_struct *mm = file->private_data;
+
+ switch (cmd) {
+ case PAGEMAP_SCAN:
+ return do_pagemap_scan(mm, arg);
+
+ default:
+ return -EINVAL;
+ }
+}
+
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
.open = pagemap_open,
.release = pagemap_release,
+ .unlocked_ioctl = do_pagemap_cmd,
+ .compat_ioctl = do_pagemap_cmd,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
@@ -1945,8 +2673,9 @@ static int show_numa_map(struct seq_file *m, void *v)
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
- struct mempolicy *pol;
char buffer[64];
+ struct mempolicy *pol;
+ pgoff_t ilx;
int nid;
if (!mm)
@@ -1955,7 +2684,7 @@ static int show_numa_map(struct seq_file *m, void *v)
/* Ensure we start with an empty set of numa_maps statistics. */
memset(md, 0, sizeof(*md));
- pol = __get_vma_policy(vma, vma->vm_start);
+ pol = __get_vma_policy(vma, vma->vm_start, &ilx);
if (pol) {
mpol_to_str(buffer, sizeof(buffer), pol);
mpol_cond_put(pol);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 023b91b4e1f0..58b5de081b57 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -803,12 +803,6 @@ dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]));
}
-static struct shrinker dqcache_shrinker = {
- .count_objects = dqcache_shrink_count,
- .scan_objects = dqcache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
-
/*
* Safely release dquot and put reference to dquot.
*/
@@ -2982,6 +2976,7 @@ static int __init dquot_init(void)
{
int i, ret;
unsigned long nr_hash, order;
+ struct shrinker *dqcache_shrinker;
printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
@@ -3016,8 +3011,14 @@ static int __init dquot_init(void)
pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));
- if (register_shrinker(&dqcache_shrinker, "dquota-cache"))
- panic("Cannot register dquot shrinker");
+ dqcache_shrinker = shrinker_alloc(0, "dquota-cache");
+ if (!dqcache_shrinker)
+ panic("Cannot allocate dquot shrinker");
+
+ dqcache_shrinker->count_objects = dqcache_shrink_count;
+ dqcache_shrinker->scan_objects = dqcache_shrink_scan;
+
+ shrinker_register(dqcache_shrinker);
return 0;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c8572346556f..1d825459ee6e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2503,10 +2503,10 @@ out:
* start/recovery path as __block_write_full_folio, along with special
* code to handle reiserfs tails.
*/
-static int reiserfs_write_full_page(struct page *page,
+static int reiserfs_write_full_folio(struct folio *folio,
struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
unsigned long end_index = inode->i_size >> PAGE_SHIFT;
int error = 0;
unsigned long block;
@@ -2514,7 +2514,7 @@ static int reiserfs_write_full_page(struct page *page,
struct buffer_head *head, *bh;
int partial = 0;
int nr = 0;
- int checked = PageChecked(page);
+ int checked = folio_test_checked(folio);
struct reiserfs_transaction_handle th;
struct super_block *s = inode->i_sb;
int bh_per_page = PAGE_SIZE / s->s_blocksize;
@@ -2522,47 +2522,46 @@ static int reiserfs_write_full_page(struct page *page,
/* no logging allowed when nonblocking or from PF_MEMALLOC */
if (checked && (current->flags & PF_MEMALLOC)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
return 0;
}
/*
- * The page dirty bit is cleared before writepage is called, which
+ * The folio dirty bit is cleared before writepage is called, which
* means we have to tell create_empty_buffers to make dirty buffers
- * The page really should be up to date at this point, so tossing
+ * The folio really should be up to date at this point, so tossing
* in the BH_Uptodate is just a sanity check.
*/
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, s->s_blocksize,
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio, s->s_blocksize,
(1 << BH_Dirty) | (1 << BH_Uptodate));
- }
- head = page_buffers(page);
/*
- * last page in the file, zero out any contents past the
+ * last folio in the file, zero out any contents past the
* last byte in the file
*/
- if (page->index >= end_index) {
+ if (folio->index >= end_index) {
unsigned last_offset;
last_offset = inode->i_size & (PAGE_SIZE - 1);
- /* no file contents in this page */
- if (page->index >= end_index + 1 || !last_offset) {
- unlock_page(page);
+ /* no file contents in this folio */
+ if (folio->index >= end_index + 1 || !last_offset) {
+ folio_unlock(folio);
return 0;
}
- zero_user_segment(page, last_offset, PAGE_SIZE);
+ folio_zero_segment(folio, last_offset, folio_size(folio));
}
bh = head;
- block = page->index << (PAGE_SHIFT - s->s_blocksize_bits);
+ block = folio->index << (PAGE_SHIFT - s->s_blocksize_bits);
last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
/* first map all the buffers, logging any direct items we find */
do {
if (block > last_block) {
/*
* This can happen when the block size is less than
- * the page size. The corresponding bytes in the page
+ * the folio size. The corresponding bytes in the folio
* were zero filled above
*/
clear_buffer_dirty(bh);
@@ -2589,7 +2588,7 @@ static int reiserfs_write_full_page(struct page *page,
* blocks we're going to log
*/
if (checked) {
- ClearPageChecked(page);
+ folio_clear_checked(folio);
reiserfs_write_lock(s);
error = journal_begin(&th, s, bh_per_page + 1);
if (error) {
@@ -2598,7 +2597,7 @@ static int reiserfs_write_full_page(struct page *page,
}
reiserfs_update_inode_transaction(inode);
}
- /* now go through and lock any dirty buffers on the page */
+ /* now go through and lock any dirty buffers on the folio */
do {
get_bh(bh);
if (!buffer_mapped(bh))
@@ -2619,7 +2618,7 @@ static int reiserfs_write_full_page(struct page *page,
lock_buffer(bh);
} else {
if (!trylock_buffer(bh)) {
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
continue;
}
}
@@ -2636,13 +2635,13 @@ static int reiserfs_write_full_page(struct page *page,
if (error)
goto fail;
}
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
/*
- * since any buffer might be the only dirty buffer on the page,
- * the first submit_bh can bring the page out of writeback.
+ * since any buffer might be the only dirty buffer on the folio,
+ * the first submit_bh can bring the folio out of writeback.
* be careful with the buffers.
*/
do {
@@ -2659,10 +2658,10 @@ static int reiserfs_write_full_page(struct page *page,
done:
if (nr == 0) {
/*
- * if this page only had a direct item, it is very possible for
+ * if this folio only had a direct item, it is very possible for
* no io to be required without there being an error. Or,
* someone else could have locked them and sent them down the
- * pipe without locking the page
+ * pipe without locking the folio
*/
bh = head;
do {
@@ -2673,18 +2672,18 @@ done:
bh = bh->b_this_page;
} while (bh != head);
if (!partial)
- SetPageUptodate(page);
- end_page_writeback(page);
+ folio_mark_uptodate(folio);
+ folio_end_writeback(folio);
}
return error;
fail:
/*
* catches various errors, we need to make sure any valid dirty blocks
- * get to the media. The page is currently locked and not marked for
+ * get to the media. The folio is currently locked and not marked for
* writeback
*/
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
bh = head;
do {
get_bh(bh);
@@ -2694,16 +2693,16 @@ fail:
} else {
/*
* clear any dirty bits that might have come from
- * getting attached to a dirty page
+ * getting attached to a dirty folio
*/
clear_buffer_dirty(bh);
}
bh = bh->b_this_page;
} while (bh != head);
- SetPageError(page);
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
+ folio_set_error(folio);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
@@ -2724,9 +2723,10 @@ static int reiserfs_read_folio(struct file *f, struct folio *folio)
static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct folio *folio = page_folio(page);
+ struct inode *inode = folio->mapping->host;
reiserfs_wait_on_write_block(inode->i_sb);
- return reiserfs_write_full_page(page, wbc);
+ return reiserfs_write_full_folio(folio, wbc);
}
static void reiserfs_truncate_failed_write(struct inode *inode)
diff --git a/fs/super.c b/fs/super.c
index c7b452e12e4c..77faad662739 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -178,7 +178,7 @@ static void super_wake(struct super_block *sb, unsigned int flag)
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
- * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
+ * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
@@ -191,7 +191,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
long dentries;
long inodes;
- sb = container_of(shrink, struct super_block, s_shrink);
+ sb = shrink->private_data;
/*
* Deadlock avoidance. We may hold various FS locks, and we don't want
@@ -244,7 +244,7 @@ static unsigned long super_cache_count(struct shrinker *shrink,
struct super_block *sb;
long total_objects = 0;
- sb = container_of(shrink, struct super_block, s_shrink);
+ sb = shrink->private_data;
/*
* We don't call super_trylock_shared() here as it is a scalability
@@ -306,7 +306,7 @@ static void destroy_unused_super(struct super_block *s)
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
- free_prealloced_shrinker(&s->s_shrink);
+ shrinker_free(s->s_shrink);
/* no delays needed */
destroy_super_work(&s->destroy_work);
}
@@ -383,16 +383,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
- s->s_shrink.seeks = DEFAULT_SEEKS;
- s->s_shrink.scan_objects = super_cache_scan;
- s->s_shrink.count_objects = super_cache_count;
- s->s_shrink.batch = 1024;
- s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
- if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name))
+ s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
+ "sb-%s", type->name);
+ if (!s->s_shrink)
goto fail;
- if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
+
+ s->s_shrink->scan_objects = super_cache_scan;
+ s->s_shrink->count_objects = super_cache_count;
+ s->s_shrink->batch = 1024;
+ s->s_shrink->private_data = s;
+
+ if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink))
goto fail;
- if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
+ if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
goto fail;
return s;
@@ -477,7 +480,7 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- unregister_shrinker(&s->s_shrink);
+ shrinker_free(s->s_shrink);
fs->kill_sb(s);
kill_super_notify(s);
@@ -818,7 +821,7 @@ retry:
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(s->s_type);
- register_shrinker_prepared(&s->s_shrink);
+ shrinker_register(s->s_shrink);
return s;
share_extant_sb:
@@ -901,7 +904,7 @@ retry:
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
- register_shrinker_prepared(&s->s_shrink);
+ shrinker_register(s->s_shrink);
return s;
}
EXPORT_SYMBOL(sget);
@@ -1540,7 +1543,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
mutex_unlock(&bdev->bd_fsfreeze_mutex);
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
+ shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name,
sb->s_id);
sb_set_blocksize(sb, block_size(bdev));
return 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 366941d4a18a..0d0478815d4d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -54,11 +54,7 @@ module_param_cb(default_version, &ubifs_default_version_ops, &ubifs_default_vers
static struct kmem_cache *ubifs_inode_slab;
/* UBIFS TNC shrinker description */
-static struct shrinker ubifs_shrinker_info = {
- .scan_objects = ubifs_shrink_scan,
- .count_objects = ubifs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *ubifs_shrinker_info;
/**
* validate_inode - validate inode.
@@ -2373,7 +2369,7 @@ static void inode_slab_ctor(void *obj)
static int __init ubifs_init(void)
{
- int err;
+ int err = -ENOMEM;
BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24);
@@ -2439,10 +2435,15 @@ static int __init ubifs_init(void)
if (!ubifs_inode_slab)
return -ENOMEM;
- err = register_shrinker(&ubifs_shrinker_info, "ubifs-slab");
- if (err)
+ ubifs_shrinker_info = shrinker_alloc(0, "ubifs-slab");
+ if (!ubifs_shrinker_info)
goto out_slab;
+ ubifs_shrinker_info->count_objects = ubifs_shrink_count;
+ ubifs_shrinker_info->scan_objects = ubifs_shrink_scan;
+
+ shrinker_register(ubifs_shrinker_info);
+
err = ubifs_compressors_init();
if (err)
goto out_shrinker;
@@ -2467,7 +2468,7 @@ out_dbg:
dbg_debugfs_exit();
ubifs_compressors_exit();
out_shrinker:
- unregister_shrinker(&ubifs_shrinker_info);
+ shrinker_free(ubifs_shrinker_info);
out_slab:
kmem_cache_destroy(ubifs_inode_slab);
return err;
@@ -2483,7 +2484,7 @@ static void __exit ubifs_exit(void)
dbg_debugfs_exit();
ubifs_sysfs_exit();
ubifs_compressors_exit();
- unregister_shrinker(&ubifs_shrinker_info);
+ shrinker_free(ubifs_shrinker_info);
/*
* Make sure all delayed rcu free inodes are flushed before we
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 2436e3f82147..53c11be2b2c1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -240,6 +240,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
unsigned int count, sector_t oldb,
sector_t newb, struct page *locked_page)
{
+ struct folio *folio, *locked_folio = page_folio(locked_page);
const unsigned blks_per_page =
1 << (PAGE_SHIFT - inode->i_blkbits);
const unsigned mask = blks_per_page - 1;
@@ -247,42 +248,39 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
pgoff_t index, cur_index, last_index;
unsigned pos, j, lblock;
sector_t end, i;
- struct page *page;
struct buffer_head *head, *bh;
UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n",
inode->i_ino, count,
(unsigned long long)oldb, (unsigned long long)newb);
- BUG_ON(!locked_page);
- BUG_ON(!PageLocked(locked_page));
+ BUG_ON(!folio_test_locked(locked_folio));
- cur_index = locked_page->index;
+ cur_index = locked_folio->index;
end = count + beg;
last_index = end >> (PAGE_SHIFT - inode->i_blkbits);
for (i = beg; i < end; i = (i | mask) + 1) {
index = i >> (PAGE_SHIFT - inode->i_blkbits);
if (likely(cur_index != index)) {
- page = ufs_get_locked_page(mapping, index);
- if (!page)/* it was truncated */
+ folio = ufs_get_locked_folio(mapping, index);
+ if (!folio) /* it was truncated */
continue;
- if (IS_ERR(page)) {/* or EIO */
+ if (IS_ERR(folio)) {/* or EIO */
ufs_error(inode->i_sb, __func__,
"read of page %llu failed\n",
(unsigned long long)index);
continue;
}
} else
- page = locked_page;
+ folio = locked_folio;
- head = page_buffers(page);
+ head = folio_buffers(folio);
bh = head;
pos = i & mask;
for (j = 0; j < pos; ++j)
bh = bh->b_this_page;
-
if (unlikely(index == last_index))
lblock = end & mask;
else
@@ -313,7 +311,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
} while (bh != head);
if (likely(cur_index != index))
- ufs_put_locked_page(page);
+ ufs_put_locked_folio(folio);
}
UFSD("EXIT\n");
}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 338e4b97312f..ebce93b08281 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1063,7 +1063,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned i, end;
sector_t lastfrag;
- struct page *lastpage;
+ struct folio *folio;
struct buffer_head *bh;
u64 phys64;
@@ -1074,18 +1074,17 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
lastfrag--;
- lastpage = ufs_get_locked_page(mapping, lastfrag >>
+ folio = ufs_get_locked_folio(mapping, lastfrag >>
(PAGE_SHIFT - inode->i_blkbits));
- if (IS_ERR(lastpage)) {
- err = -EIO;
- goto out;
- }
-
- end = lastfrag & ((1 << (PAGE_SHIFT - inode->i_blkbits)) - 1);
- bh = page_buffers(lastpage);
- for (i = 0; i < end; ++i)
- bh = bh->b_this_page;
+ if (IS_ERR(folio)) {
+ err = -EIO;
+ goto out;
+ }
+ end = lastfrag & ((1 << (PAGE_SHIFT - inode->i_blkbits)) - 1);
+ bh = folio_buffers(folio);
+ for (i = 0; i < end; ++i)
+ bh = bh->b_this_page;
err = ufs_getfrag_block(inode, lastfrag, bh, 1);
@@ -1101,7 +1100,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
*/
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
- set_page_dirty(lastpage);
+ folio_mark_dirty(folio);
}
if (lastfrag >= UFS_IND_FRAGMENT) {
@@ -1119,7 +1118,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
}
}
out_unlock:
- ufs_put_locked_page(lastpage);
+ ufs_put_locked_folio(folio);
out:
return err;
}
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 08ddf41eaaad..13ba34e6d64f 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -230,42 +230,40 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
}
/**
- * ufs_get_locked_page() - locate, pin and lock a pagecache page, if not exist
+ * ufs_get_locked_folio() - locate, pin and lock a pagecache folio, if not exist
* read it from disk.
* @mapping: the address_space to search
* @index: the page index
*
- * Locates the desired pagecache page, if not exist we'll read it,
+ * Locates the desired pagecache folio, if not exist we'll read it,
* locks it, increments its reference
* count and returns its address.
*
*/
-
-struct page *ufs_get_locked_page(struct address_space *mapping,
+struct folio *ufs_get_locked_folio(struct address_space *mapping,
pgoff_t index)
{
struct inode *inode = mapping->host;
- struct page *page = find_lock_page(mapping, index);
- if (!page) {
- page = read_mapping_page(mapping, index, NULL);
+ struct folio *folio = filemap_lock_folio(mapping, index);
+ if (!folio) {
+ folio = read_mapping_folio(mapping, index, NULL);
- if (IS_ERR(page)) {
- printk(KERN_ERR "ufs_change_blocknr: "
- "read_mapping_page error: ino %lu, index: %lu\n",
+ if (IS_ERR(folio)) {
+ printk(KERN_ERR "ufs_change_blocknr: read_mapping_folio error: ino %lu, index: %lu\n",
mapping->host->i_ino, index);
- return page;
+ return folio;
}
- lock_page(page);
+ folio_lock(folio);
- if (unlikely(page->mapping == NULL)) {
+ if (unlikely(folio->mapping == NULL)) {
/* Truncate got there first */
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return NULL;
}
}
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
- return page;
+ if (!folio_buffers(folio))
+ create_empty_buffers(folio, 1 << inode->i_blkbits, 0);
+ return folio;
}
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 89247193d96d..0ecd2ed792f5 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -273,15 +273,13 @@ extern void _ubh_ubhcpymem_(struct ufs_sb_private_info *, unsigned char *, struc
extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head *, unsigned char *, unsigned);
/* This functions works with cache pages*/
-extern struct page *ufs_get_locked_page(struct address_space *mapping,
- pgoff_t index);
-static inline void ufs_put_locked_page(struct page *page)
+struct folio *ufs_get_locked_folio(struct address_space *mapping, pgoff_t index);
+static inline void ufs_put_locked_folio(struct folio *folio)
{
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
-
/*
* macros and inline function to get important structures from ufs_sb_private_info
*/
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 56eaae9dac1a..ac616cfbacf5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -123,6 +123,11 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
return ctx->features & UFFD_FEATURE_INITIALIZED;
}
+static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx)
+{
+ return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC);
+}
+
/*
* Whether WP_UNPOPULATED is enabled on the uffd context. It is only
* meaningful when userfaultfd_wp()==true on the vma and when it's
@@ -922,20 +927,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
continue;
}
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
- prev = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end,
- new_flags, vma->anon_vma,
- vma->vm_file, vma->vm_pgoff,
- vma_policy(vma),
- NULL_VM_UFFD_CTX, anon_vma_name(vma));
- if (prev) {
- vma = prev;
- } else {
- prev = vma;
- }
+ vma = vma_modify_flags_uffd(&vmi, prev, vma, vma->vm_start,
+ vma->vm_end, new_flags,
+ NULL_VM_UFFD_CTX);
vma_start_write(vma);
userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+
+ prev = vma;
}
mmap_write_unlock(mm);
mmput(mm);
@@ -1325,7 +1325,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool basic_ioctls;
unsigned long start, end, vma_end;
struct vma_iterator vmi;
- pgoff_t pgoff;
+ bool wp_async = userfaultfd_wp_async_ctx(ctx);
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1399,7 +1399,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
/* check not compatible vmas */
ret = -EINVAL;
- if (!vma_can_userfault(cur, vm_flags))
+ if (!vma_can_userfault(cur, vm_flags, wp_async))
goto out_unlock;
/*
@@ -1460,7 +1460,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
for_each_vma_range(vmi, vma, end) {
cond_resched();
- BUG_ON(!vma_can_userfault(vma, vm_flags));
+ BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
@@ -1478,28 +1478,14 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_end = min(end, vma->vm_end);
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
- pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, pgoff,
- vma_policy(vma),
- ((struct vm_userfaultfd_ctx){ ctx }),
- anon_vma_name(vma));
- if (prev) {
- /* vma_merge() invalidated the mas */
- vma = prev;
- goto next;
- }
- if (vma->vm_start < start) {
- ret = split_vma(&vmi, vma, start, 1);
- if (ret)
- break;
- }
- if (vma->vm_end > end) {
- ret = split_vma(&vmi, vma, end, 0);
- if (ret)
- break;
+ vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
+ new_flags,
+ (struct vm_userfaultfd_ctx){ctx});
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ break;
}
- next:
+
/*
* In the vma_merge() successful mprotect-like case 8:
* the next vma was merged into the current one and
@@ -1561,7 +1547,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
struct vma_iterator vmi;
- pgoff_t pgoff;
+ bool wp_async = userfaultfd_wp_async_ctx(ctx);
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1615,7 +1601,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* provides for more strict behavior to notice
* unregistration errors.
*/
- if (!vma_can_userfault(cur, cur->vm_flags))
+ if (!vma_can_userfault(cur, cur->vm_flags, wp_async))
goto out_unlock;
found = true;
@@ -1631,7 +1617,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
for_each_vma_range(vmi, vma, end) {
cond_resched();
- BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
+ BUG_ON(!vma_can_userfault(vma, vma->vm_flags, wp_async));
/*
* Nothing to do: this vma is already registered into this
@@ -1664,26 +1650,13 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
uffd_wp_range(vma, start, vma_end - start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
- pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, pgoff,
- vma_policy(vma),
- NULL_VM_UFFD_CTX, anon_vma_name(vma));
- if (prev) {
- vma = prev;
- goto next;
- }
- if (vma->vm_start < start) {
- ret = split_vma(&vmi, vma, start, 1);
- if (ret)
- break;
- }
- if (vma->vm_end > end) {
- ret = split_vma(&vmi, vma, end, 0);
- if (ret)
- break;
+ vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
+ new_flags, NULL_VM_UFFD_CTX);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ break;
}
- next:
+
/*
* In the vma_merge() successful mprotect-like case 8:
* the next vma was merged into the current one and
@@ -2018,6 +1991,11 @@ out:
return ret;
}
+bool userfaultfd_wp_async(struct vm_area_struct *vma)
+{
+ return userfaultfd_wp_async_ctx(vma->vm_userfaultfd_ctx.ctx);
+}
+
static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
@@ -2051,6 +2029,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
goto err_out;
+
+ /* WP_ASYNC relies on WP_UNPOPULATED, choose it unconditionally */
+ if (features & UFFD_FEATURE_WP_ASYNC)
+ features |= UFFD_FEATURE_WP_UNPOPULATED;
+
/* report all available features and ioctls to userland */
uffdio_api.features = UFFD_API_FEATURES;
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
@@ -2063,6 +2046,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
#ifndef CONFIG_PTE_MARKER_UFFD_WP
uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 003e157241da..545c7991b9b5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1913,8 +1913,7 @@ xfs_buftarg_shrink_scan(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_buftarg *btp = container_of(shrink,
- struct xfs_buftarg, bt_shrinker);
+ struct xfs_buftarg *btp = shrink->private_data;
LIST_HEAD(dispose);
unsigned long freed;
@@ -1936,8 +1935,7 @@ xfs_buftarg_shrink_count(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_buftarg *btp = container_of(shrink,
- struct xfs_buftarg, bt_shrinker);
+ struct xfs_buftarg *btp = shrink->private_data;
return list_lru_shrink_count(&btp->bt_lru, sc);
}
@@ -1945,7 +1943,7 @@ void
xfs_free_buftarg(
struct xfs_buftarg *btp)
{
- unregister_shrinker(&btp->bt_shrinker);
+ shrinker_free(btp->bt_shrinker);
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
percpu_counter_destroy(&btp->bt_io_count);
list_lru_destroy(&btp->bt_lru);
@@ -2029,13 +2027,17 @@ xfs_alloc_buftarg(
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
goto error_lru;
- btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
- btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
- btp->bt_shrinker.seeks = DEFAULT_SEEKS;
- btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
- if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s",
- mp->m_super->s_id))
+ btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s",
+ mp->m_super->s_id);
+ if (!btp->bt_shrinker)
goto error_pcpu;
+
+ btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
+ btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
+ btp->bt_shrinker->private_data = btp;
+
+ shrinker_register(btp->bt_shrinker);
+
return btp;
error_pcpu:
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index ada9d310b7d3..c86e16419656 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -109,7 +109,7 @@ typedef struct xfs_buftarg {
size_t bt_logical_sectormask;
/* LRU control structures */
- struct shrinker bt_shrinker;
+ struct shrinker *bt_shrinker;
struct list_lru bt_lru;
struct percpu_counter bt_io_count;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3c210ac83713..dba514a2c84d 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2165,8 +2165,7 @@ xfs_inodegc_shrinker_count(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_mount *mp = container_of(shrink, struct xfs_mount,
- m_inodegc_shrinker);
+ struct xfs_mount *mp = shrink->private_data;
struct xfs_inodegc *gc;
int cpu;
@@ -2187,8 +2186,7 @@ xfs_inodegc_shrinker_scan(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_mount *mp = container_of(shrink, struct xfs_mount,
- m_inodegc_shrinker);
+ struct xfs_mount *mp = shrink->private_data;
struct xfs_inodegc *gc;
int cpu;
bool no_items = true;
@@ -2224,13 +2222,19 @@ int
xfs_inodegc_register_shrinker(
struct xfs_mount *mp)
{
- struct shrinker *shrink = &mp->m_inodegc_shrinker;
+ mp->m_inodegc_shrinker = shrinker_alloc(SHRINKER_NONSLAB,
+ "xfs-inodegc:%s",
+ mp->m_super->s_id);
+ if (!mp->m_inodegc_shrinker)
+ return -ENOMEM;
+
+ mp->m_inodegc_shrinker->count_objects = xfs_inodegc_shrinker_count;
+ mp->m_inodegc_shrinker->scan_objects = xfs_inodegc_shrinker_scan;
+ mp->m_inodegc_shrinker->seeks = 0;
+ mp->m_inodegc_shrinker->batch = XFS_INODEGC_SHRINKER_BATCH;
+ mp->m_inodegc_shrinker->private_data = mp;
- shrink->count_objects = xfs_inodegc_shrinker_count;
- shrink->scan_objects = xfs_inodegc_shrinker_scan;
- shrink->seeks = 0;
- shrink->flags = SHRINKER_NONSLAB;
- shrink->batch = XFS_INODEGC_SHRINKER_BATCH;
+ shrinker_register(mp->m_inodegc_shrinker);
- return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id);
+ return 0;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0a0fd19573d8..aed5be5508fe 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1021,7 +1021,7 @@ xfs_mountfs(
out_log_dealloc:
xfs_log_mount_cancel(mp);
out_inodegc_shrinker:
- unregister_shrinker(&mp->m_inodegc_shrinker);
+ shrinker_free(mp->m_inodegc_shrinker);
out_fail_wait:
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
xfs_buftarg_drain(mp->m_logdev_targp);
@@ -1104,7 +1104,7 @@ xfs_unmountfs(
#if defined(DEBUG)
xfs_errortag_clearall(mp);
#endif
- unregister_shrinker(&mp->m_inodegc_shrinker);
+ shrinker_free(mp->m_inodegc_shrinker);
xfs_free_perag(mp);
xfs_errortag_del(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d19cca099bc3..219681d29fbc 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,7 @@ typedef struct xfs_mount {
atomic_t m_agirotor; /* last ag dir inode alloced */
/* Memory shrinker to throttle and reprioritize inodegc */
- struct shrinker m_inodegc_shrinker;
+ struct shrinker *m_inodegc_shrinker;
/*
* Workqueue item so that we can coalesce multiple inode flush attempts
* into a single flush.
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 086e78a6143a..94a7932ac570 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -504,8 +504,7 @@ xfs_qm_shrink_scan(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_quotainfo *qi = container_of(shrink,
- struct xfs_quotainfo, qi_shrinker);
+ struct xfs_quotainfo *qi = shrink->private_data;
struct xfs_qm_isolate isol;
unsigned long freed;
int error;
@@ -539,8 +538,7 @@ xfs_qm_shrink_count(
struct shrinker *shrink,
struct shrink_control *sc)
{
- struct xfs_quotainfo *qi = container_of(shrink,
- struct xfs_quotainfo, qi_shrinker);
+ struct xfs_quotainfo *qi = shrink->private_data;
return list_lru_shrink_count(&qi->qi_lru, sc);
}
@@ -680,15 +678,18 @@ xfs_qm_init_quotainfo(
if (XFS_IS_PQUOTA_ON(mp))
xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
- qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
- qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
- qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
- qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
-
- error = register_shrinker(&qinf->qi_shrinker, "xfs-qm:%s",
- mp->m_super->s_id);
- if (error)
+ qinf->qi_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-qm:%s",
+ mp->m_super->s_id);
+ if (!qinf->qi_shrinker) {
+ error = -ENOMEM;
goto out_free_inos;
+ }
+
+ qinf->qi_shrinker->count_objects = xfs_qm_shrink_count;
+ qinf->qi_shrinker->scan_objects = xfs_qm_shrink_scan;
+ qinf->qi_shrinker->private_data = qinf;
+
+ shrinker_register(qinf->qi_shrinker);
return 0;
@@ -718,7 +719,7 @@ xfs_qm_destroy_quotainfo(
qi = mp->m_quotainfo;
ASSERT(qi != NULL);
- unregister_shrinker(&qi->qi_shrinker);
+ shrinker_free(qi->qi_shrinker);
list_lru_destroy(&qi->qi_lru);
xfs_qm_destroy_quotainos(qi);
mutex_destroy(&qi->qi_tree_lock);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9683f0457d19..d5c9fc4ba591 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -63,7 +63,7 @@ struct xfs_quotainfo {
struct xfs_def_quota qi_usr_default;
struct xfs_def_quota qi_grp_default;
struct xfs_def_quota qi_prj_default;
- struct shrinker qi_shrinker;
+ struct shrinker *qi_shrinker;
/* Minimum and maximum quota expiration timestamp values. */
time64_t qi_expiry_min;