diff options
Diffstat (limited to 'fs')
39 files changed, 279 insertions, 215 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index effafc3e0ced..7ec022e9361a 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1422,11 +1422,31 @@ alloc_done: wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &wp->ptrs, ob, i) + open_bucket_for_each(c, &wp->ptrs, ob, i) { + /* + * Ensure proper write alignment - either due to misaligned + * bucket sizes (from buggy bcachefs-tools), or writes that mix + * logical/physical alignment: + */ + struct bch_dev *ca = ob_dev(c, ob); + u64 offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free; + unsigned align = round_up(offset, block_sectors(c)) - offset; + + ob->sectors_free = max_t(int, 0, ob->sectors_free - align); + wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + } wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + /* Did alignment use up space in an open_bucket? */ + if (unlikely(!wp->sectors_free)) { + bch2_alloc_sectors_done(c, wp); + goto retry; + } + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5fd4a58d2ad2..60782f3e5aec 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -41,6 +41,7 @@ void bch2_btree_node_io_unlock(struct btree *b) clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } @@ -1400,6 +1401,7 @@ start: printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1595,6 +1597,7 @@ fsck_err: printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1721,6 +1724,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; @@ -2061,8 +2065,10 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); - else + else { + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); + } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) @@ -2175,6 +2181,7 @@ static void btree_node_write_endio(struct bio *bio) } clear_btree_node_write_in_flight_inner(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_io_complete_wq, &wb->work); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 8d75b27a1418..af1532de4a37 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -44,6 +44,7 @@ static inline void bucket_unlock(struct bucket *b) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 62d27e04d763..51893e1ee874 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -160,6 +160,7 @@ static inline void gc_stripe_unlock(struct gc_stripe *s) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR); } diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a615e4852ded..d9ebffa5b3a2 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -269,7 +269,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, journal_shutdown) \ + x(EROFS, journal_shutdown) \ x(EIO, journal_flush_err) \ x(EIO, journal_write_err) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index dca2b8425cc0..e597fb9c9823 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1056,8 +1056,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, struct bch_extent_ptr *ptr) { - if (!opts->promote_target || - !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + unsigned target = opts->promote_target ?: opts->foreground_target; + + if (target && !bch2_dev_in_target(c, ptr->dev, target)) return false; struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 113db85b6ef9..b6801861c66f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2502,10 +2502,9 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); - /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops - */ + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2567,10 +2566,6 @@ got_sb: sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; - #ifdef CONFIG_UNICODE sb->s_encoding = c->cf_encoding; #endif diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 63cdf885c9e2..ded18a94ed02 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -19,6 +19,7 @@ #include <linux/ioprio.h> #include <linux/string_choices.h> +#include <linux/sched/sysctl.h> void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { @@ -1262,7 +1263,8 @@ int bch2_journal_read(struct bch_fs *c, degraded = true; } - closure_sync(&jlist.cl); + while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; if (jlist.ret) return jlist.ret; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ea670c3c43d8..976464d8a695 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -266,10 +266,11 @@ out: static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - bool ret; - spin_lock(&j->lock); - ret = ja->discard_idx != ja->dirty_idx_ondisk; + unsigned min_free = max(4, ja->nr / 8); + + bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < min_free && + ja->discard_idx != ja->dirty_idx_ondisk; spin_unlock(&j->lock); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fc396b9fa754..dfdbb9259985 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -784,7 +784,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto err; ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "flushing btree write buffer"); if (ret) goto err; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 27943082c093..84a37d971ffd 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -377,6 +377,11 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); } else { + /* Make sure error counts/counters are persisted */ + mutex_lock(&c->sb_lock); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + bch_verbose(c, "done going read-only, filesystem not clean"); } } diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index dea73bc1cb51..314a24d15d4e 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki struct stdio_buf *buf = &stdio->output; unsigned long flags; ssize_t ret; - again: + if (stdio->done) + return -EPIPE; + spin_lock_irqsave(&buf->lock, flags); ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args); spin_unlock_irqrestore(&buf->lock, flags); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7f8ee5d48a4..7f11ef559be6 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -606,7 +606,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8515c31f563b..13bdd60da3c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3508,8 +3508,8 @@ static void btree_clear_folio_dirty_tag(struct folio *folio) ASSERT(folio_test_locked(folio)); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) - __xa_clear_mark(&folio->mapping->i_pages, - folio_index(folio), PAGECACHE_TAG_DIRTY); + __xa_clear_mark(&folio->mapping->i_pages, folio->index, + PAGECACHE_TAG_DIRTY); xa_unlock_irq(&folio->mapping->i_pages); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2e261892c7bc..f5b28b5c4908 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -298,6 +298,8 @@ static inline int __pure num_extent_pages(const struct extent_buffer *eb) */ static inline int __pure num_extent_folios(const struct extent_buffer *eb) { + if (!eb->folios[0]) + return 0; if (folio_order(eb->folios[0])) return 1; return num_extent_pages(eb); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2c5edcee9450..c3b2e29e3e01 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1541,8 +1541,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, u64 extent_gen; int ret; - if (unlikely(!extent_root)) { - btrfs_err(fs_info, "no valid extent root for scrub"); + if (unlikely(!extent_root || !csum_root)) { + btrfs_err(fs_info, "no valid extent or csum root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8c21c55be53..8e6b6fed7429 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -733,82 +733,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/<fd>", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1513,23 +1437,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1574,8 +1487,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1584,7 +1496,6 @@ free_disk_super: error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 4fa0a0121288..60c7cc4c105c 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -150,10 +150,10 @@ io_retry: io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index cadec6b1b554..da6ee7c39290 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -357,7 +357,6 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, - Opt_err }; static const struct constant_table erofs_param_cache_strategy[] = { diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5c061aaeeb45..b8e6b76c23d5 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -79,9 +79,6 @@ struct z_erofs_pcluster { /* L: whether partial decompression or not */ bool partial; - /* L: indicate several pageofs_outs or not */ - bool multibases; - /* L: whether extra buffer allocations are best-effort */ bool besteffort; @@ -1046,8 +1043,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, break; erofs_onlinefolio_split(folio); - if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { f->pcl->length = offset + end - map->m_la; f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; @@ -1093,7 +1088,6 @@ struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; - /* pages with the longest decompressed length for deduplication */ struct page **decompressed_pages; /* pages to keep the compressed data */ @@ -1102,6 +1096,8 @@ struct z_erofs_backend { struct list_head decompressed_secondary_bvecs; struct page **pagepool; unsigned int onstack_used, nr_pages; + /* indicate if temporary copies should be preserved for later use */ + bool keepxcpy; }; struct z_erofs_bvec_item { @@ -1112,18 +1108,20 @@ struct z_erofs_bvec_item { static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { + int poff = bvec->offset + be->pcl->pageofs_out; struct z_erofs_bvec_item *item; - unsigned int pgnr; - - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && - (bvec->end == PAGE_SIZE || - bvec->offset + bvec->end == be->pcl->length)) { - pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; - DBG_BUGON(pgnr >= be->nr_pages); - if (!be->decompressed_pages[pgnr]) { - be->decompressed_pages[pgnr] = bvec->page; + struct page **page; + + if (!(poff & ~PAGE_MASK) && (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { + DBG_BUGON((poff >> PAGE_SHIFT) >= be->nr_pages); + page = be->decompressed_pages + (poff >> PAGE_SHIFT); + if (!*page) { + *page = bvec->page; return; } + } else { + be->keepxcpy = true; } /* (cold path) one pcluster is requested multiple times */ @@ -1289,7 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .alg = pcl->algorithmformat, .inplace_io = overlapped, .partial_decoding = pcl->partial, - .fillgaps = pcl->multibases, + .fillgaps = be->keepxcpy, .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1346,7 +1344,6 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) pcl->length = 0; pcl->partial = true; - pcl->multibases = false; pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; diff --git a/fs/namespace.c b/fs/namespace.c index 98a5cd756e9a..1b466c54a357 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -787,15 +787,11 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); - smp_mb(); // see mntput_no_expire() + smp_mb(); // see mntput_no_expire() and do_umount() if (likely(!read_seqretry(&mount_lock, seq))) return 0; - if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { - mnt_add_count(mnt, -1); - return 1; - } lock_mount_hash(); - if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) { mnt_add_count(mnt, -1); unlock_mount_hash(); return 1; @@ -2048,6 +2044,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { + smp_mb(); // paired with __legitimize_mnt() shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { @@ -3560,7 +3557,8 @@ static int can_move_mount_beneath(const struct path *from, * @mnt_from itself. This defeats the whole purpose of mounting * @mnt_from beneath @mnt_to. */ - if (propagation_would_overmount(parent_mnt_to, mnt_from, mp)) + if (check_mnt(mnt_from) && + propagation_would_overmount(parent_mnt_to, mnt_from, mp)) return -EINVAL; return 0; @@ -3718,15 +3716,14 @@ static int do_move_mount(struct path *old_path, if (err) goto out; - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; - /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); if (attached) put_mountpoint(old_mp); out: + if (is_anon_ns(ns)) + ns->mntns_flags &= ~MNTNS_PROPAGATING; unlock_mount(mp); if (!err) { if (attached) { diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index cb01ea81724d..d0bcf744c553 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8ac85b548c7..821cb7874685 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(folios[numfolios])) { ret = PTR_ERR(folios[numfolios]); mlog_errno(ret); + folios[numfolios] = NULL; goto out; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c7a9729dc9d0..e5f58ff2175f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } -void ocfs2_recovery_exit(struct ocfs2_super *osb) +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) { - struct ocfs2_recovery_map *rm; - - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); +} + +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1569,27 +1603,29 @@ bail: ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0..6397170f302f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51c52768132d..6aaa94c554c1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +385,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2956d888c131..e272429da3db 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -677,7 +674,6 @@ out_put: break; } out: - up_read(&sb->s_umount); kfree(rec); return status; } @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7b483f0de2a..6ac4dcd54588 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b481b834857d..bcf2ed4a8631 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8bb5022f3082..3d2533950bae 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ diff --git a/fs/pnode.c b/fs/pnode.c index 7a062a5de10e..fb77427df39e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -150,7 +150,7 @@ static struct mount *propagation_next(struct mount *m, struct mount *origin) { /* are there any slaves of this mount? */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { @@ -174,7 +174,7 @@ static struct mount *skip_propagation_subtree(struct mount *m, * Advance m such that propagation_next will not return * the slaves of m. */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) m = last_slave(m); return m; @@ -185,7 +185,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) while (1) { while (1) { struct mount *next; - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); next = next_peer(m); if (m->mnt_group_id == origin->mnt_group_id) { @@ -226,11 +226,15 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp) struct mount *child; int type; /* skip ones added by this propagate_mnt() */ - if (IS_MNT_PROPAGATED(m)) + if (IS_MNT_NEW(m)) return 0; - /* skip if mountpoint isn't covered by it */ + /* skip if mountpoint isn't visible in m */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; + /* skip if m is in the anon_ns we are emptying */ + if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING) + return 0; + if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { @@ -380,9 +384,6 @@ bool propagation_would_overmount(const struct mount *from, if (!IS_MNT_SHARED(from)) return false; - if (IS_MNT_PROPAGATED(to)) - return false; - if (to->mnt.mnt_root != mp->m_dentry) return false; diff --git a/fs/pnode.h b/fs/pnode.h index ddafe0d087ca..34b6247af01d 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_PROPAGATED(m) (!(m)->mnt_ns || ((m)->mnt_ns->mntns_flags & MNTNS_PROPAGATING)) +#define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1b..240d82c6f908 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, { struct cached_fid *cfid; - spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { if (!strcmp(cfid->path, path)) { /* @@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); - spin_unlock(&cfids->cfid_list_lock); return cfid; } } if (lookup_only) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } if (cfids->num_entries >= max_cached_dirs) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid = init_cached_dir(path); if (cfid == NULL) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid->cfids = cfids; @@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, */ cfid->has_lease = true; - spin_unlock(&cfids->cfid_list_lock); return cfid; } @@ -187,8 +180,10 @@ replay_again: if (!utf16_path) return -ENOMEM; + spin_lock(&cfids->cfid_list_lock); cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); if (cfid == NULL) { + spin_unlock(&cfids->cfid_list_lock); kfree(utf16_path); return -ENOENT; } @@ -197,7 +192,6 @@ replay_again: * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 57d9bfbadd97..2a3e46b8e15a 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -666,6 +666,8 @@ finished: /* smb2_parse_contexts() fills idata->fi.IndexNumber */ rc = smb2_parse_contexts(server, &rsp_iov[0], &oparms->fid->epoch, oparms->fid->lease_key, &oplock, &idata->fi, NULL); + if (rc) + cifs_dbg(VFS, "rc: %d parsing context of compound op\n", rc); } for (i = 0; i < num_cmds; i++) { diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 81a29857b1e3..03f606afad93 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 46aa08245742..f2a2be8467c6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 391d07da586c..482eba0f4dc1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -426,6 +426,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1f8fa3468173..dfed6fce8904 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; + + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d80f94346199..22f4bf956ba1 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_copy->copy))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, @@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, @@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, @@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_poison->updated))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, @@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx, user_uffdio_move = (struct uffdio_move __user *) arg; - if (atomic_read(&ctx->mmap_changing)) - return -EAGAIN; + ret = -EAGAIN; + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_move->move))) + return -EFAULT; + goto out; + } if (copy_from_user(&uffdio_move, user_uffdio_move, /* don't copy "move" last field */ |