summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/alloc_foreground.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h81
-rw-r--r--fs/bcachefs/bkey_methods.c24
-rw-r--r--fs/bcachefs/btree_iter.c7
-rw-r--r--fs/bcachefs/dirent.c16
-rw-r--r--fs/bcachefs/dirent.h15
-rw-r--r--fs/bcachefs/error.c17
-rw-r--r--fs/bcachefs/error.h1
-rw-r--r--fs/bcachefs/fs-ioctl.c217
-rw-r--r--fs/bcachefs/fs-ioctl.h75
-rw-r--r--fs/bcachefs/fs.c469
-rw-r--r--fs/bcachefs/inode.h8
-rw-r--r--fs/bcachefs/inode_format.h9
-rw-r--r--fs/bcachefs/journal.c36
-rw-r--r--fs/bcachefs/journal.h7
-rw-r--r--fs/bcachefs/journal_reclaim.c5
-rw-r--r--fs/bcachefs/movinggc.c7
-rw-r--r--fs/bcachefs/movinggc.h9
-rw-r--r--fs/bcachefs/namei.c4
-rw-r--r--fs/bcachefs/opts.h5
-rw-r--r--fs/bcachefs/rebalance.c11
-rw-r--r--fs/bcachefs/rebalance.h2
-rw-r--r--fs/bcachefs/recovery.c10
-rw-r--r--fs/bcachefs/recovery_passes.c70
-rw-r--r--fs/bcachefs/snapshot.c2
-rw-r--r--fs/bcachefs/str_hash.h5
-rw-r--r--fs/bcachefs/super-io.c3
-rw-r--r--fs/bcachefs/super.c150
-rw-r--r--fs/bcachefs/sysfs.c7
-rw-r--r--fs/bcachefs/tests.c4
-rw-r--r--fs/bcachefs/util.h38
-rw-r--r--fs/buffer.c73
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ext4/ialloc.c3
-rw-r--r--fs/ext4/mballoc.c3
-rw-r--r--fs/file.c2
-rw-r--r--fs/jbd2/revoke.c15
-rw-r--r--fs/namespace.c69
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/splice.c2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/xfs_zone_gc.c10
43 files changed, 852 insertions, 653 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 7c930ef77380..effafc3e0ced 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1425,6 +1425,8 @@ alloc_done:
open_bucket_for_each(c, &wp->ptrs, ob, i)
wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
+ wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c));
+
BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
return 0;
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 69ec6a012898..4c1e33cf57c0 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ
unsigned i;
open_bucket_for_each(c, &wp->ptrs, ob, i)
- ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+ ob_push(c, ob->sectors_free < block_sectors(c)
+ ? &ptrs
+ : &keep, ob);
wp->ptrs = keep;
mutex_unlock(&wp->lock);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index a3db328dee31..d6e4a496f02b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k)
#define __BKEY_PADDED(key, pad) \
struct bkey_i key; __u64 key ## _pad[pad]
+enum bch_bkey_type_flags {
+ BKEY_TYPE_strict_btree_checks = BIT(0),
+};
+
/*
* - DELETED keys are used internally to mark keys that should be ignored but
* override keys in composition order. Their version number is ignored.
@@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k)
*
* - WHITEOUT: for hash table btrees
*/
-#define BCH_BKEY_TYPES() \
- x(deleted, 0) \
- x(whiteout, 1) \
- x(error, 2) \
- x(cookie, 3) \
- x(hash_whiteout, 4) \
- x(btree_ptr, 5) \
- x(extent, 6) \
- x(reservation, 7) \
- x(inode, 8) \
- x(inode_generation, 9) \
- x(dirent, 10) \
- x(xattr, 11) \
- x(alloc, 12) \
- x(quota, 13) \
- x(stripe, 14) \
- x(reflink_p, 15) \
- x(reflink_v, 16) \
- x(inline_data, 17) \
- x(btree_ptr_v2, 18) \
- x(indirect_inline_data, 19) \
- x(alloc_v2, 20) \
- x(subvolume, 21) \
- x(snapshot, 22) \
- x(inode_v2, 23) \
- x(alloc_v3, 24) \
- x(set, 25) \
- x(lru, 26) \
- x(alloc_v4, 27) \
- x(backpointer, 28) \
- x(inode_v3, 29) \
- x(bucket_gens, 30) \
- x(snapshot_tree, 31) \
- x(logged_op_truncate, 32) \
- x(logged_op_finsert, 33) \
- x(accounting, 34) \
- x(inode_alloc_cursor, 35)
+#define BCH_BKEY_TYPES() \
+ x(deleted, 0, 0) \
+ x(whiteout, 1, 0) \
+ x(error, 2, 0) \
+ x(cookie, 3, 0) \
+ x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \
+ x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \
+ x(extent, 6, BKEY_TYPE_strict_btree_checks) \
+ x(reservation, 7, BKEY_TYPE_strict_btree_checks) \
+ x(inode, 8, BKEY_TYPE_strict_btree_checks) \
+ x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \
+ x(dirent, 10, BKEY_TYPE_strict_btree_checks) \
+ x(xattr, 11, BKEY_TYPE_strict_btree_checks) \
+ x(alloc, 12, BKEY_TYPE_strict_btree_checks) \
+ x(quota, 13, BKEY_TYPE_strict_btree_checks) \
+ x(stripe, 14, BKEY_TYPE_strict_btree_checks) \
+ x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \
+ x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \
+ x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \
+ x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \
+ x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \
+ x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \
+ x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \
+ x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \
+ x(set, 25, 0) \
+ x(lru, 26, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \
+ x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \
+ x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \
+ x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \
+ x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \
+ x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \
+ x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \
+ x(accounting, 34, BKEY_TYPE_strict_btree_checks) \
+ x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks)
enum bch_bkey_type {
-#define x(name, nr) KEY_TYPE_##name = nr,
+#define x(name, nr, ...) KEY_TYPE_##name = nr,
BCH_BKEY_TYPES()
#undef x
KEY_TYPE_MAX,
@@ -863,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
+LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 15c93576b5c2..00d05ccfaf73 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -21,7 +21,7 @@
#include "xattr.h"
const char * const bch2_bkey_types[] = {
-#define x(name, nr) #name,
+#define x(name, nr, ...) #name,
BCH_BKEY_TYPES()
#undef x
NULL
@@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
})
const struct bkey_ops bch2_bkey_ops[] = {
-#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
+#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
BCH_BKEY_TYPES()
#undef x
};
@@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = {
#undef x
};
+static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = {
+#define x(name, nr, flags) [KEY_TYPE_##name] = flags,
+ BCH_BKEY_TYPES()
+#undef x
+};
+
const char *bch2_btree_node_type_str(enum btree_node_type type)
{
return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
@@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
- (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
+ enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_type_flags[k.k->type]
+ : 0;
+
+ bool strict_key_type_allowed =
+ (from.flags & BCH_VALIDATE_commit) ||
+ type == BKEY_TYPE_btree ||
+ (from.btree < BTREE_ID_NR &&
+ (bkey_flags & BKEY_TYPE_strict_btree_checks));
+
+ bkey_fsck_err_on(strict_key_type_allowed &&
+ k.k->type < KEY_TYPE_MAX &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
c, bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index e34e9598ef25..59fa527ac685 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
struct bpos end)
{
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
- !bkey_eq(iter->pos, POS_MAX)) {
+ !bkey_eq(iter->pos, POS_MAX) &&
+ !((iter->flags & BTREE_ITER_is_extents) &&
+ iter->pos.offset == U64_MAX)) {
+
/*
* bkey_start_pos(), for extents, is not monotonically
* increasing until after filtering for snapshots:
@@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify_entry_exit(iter);
- EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode);
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (unlikely(ret)) {
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 8488a7578115..92ee59d9e00e 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -13,8 +13,8 @@
#include <linux/dcache.h>
-static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
+int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
@@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *
#endif
}
-static inline int bch2_maybe_casefold(struct btree_trans *trans,
- const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
-{
- if (likely(!info->cf_encoding)) {
- *out_cf = *str;
- return 0;
- } else {
- return bch2_casefold(trans, info, str, out_cf);
- }
-}
-
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 0880772b80a9..9838a7ba7ed1 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -23,6 +23,21 @@ struct bch_fs;
struct bch_hash_info;
struct bch_inode_info;
+int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
+ const struct qstr *, struct qstr *);
+
+static inline int bch2_maybe_casefold(struct btree_trans *trans,
+ const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
+{
+ if (likely(!info->cf_encoding)) {
+ *out_cf = *str;
+ return 0;
+ } else {
+ return bch2_casefold(trans, info, str, out_cf);
+ }
+}
+
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index baf5dfb32298..925b0b54ea2f 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
{
struct fsck_err_state *s;
- if (!test_bit(BCH_FS_fsck_running, &c->flags))
- return NULL;
-
list_for_each_entry(s, &c->fsck_error_msgs, list)
if (s->id == id) {
/*
@@ -639,14 +636,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
return ret;
}
-void bch2_flush_fsck_errs(struct bch_fs *c)
+static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
{
struct fsck_err_state *s, *n;
mutex_lock(&c->fsck_error_msgs_lock);
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
- if (s->ratelimited && s->last_msg)
+ if (print && s->ratelimited && s->last_msg)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
list_del(&s->list);
@@ -657,6 +654,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
mutex_unlock(&c->fsck_error_msgs_lock);
}
+void bch2_flush_fsck_errs(struct bch_fs *c)
+{
+ __bch2_flush_fsck_errs(c, true);
+}
+
+void bch2_free_fsck_errs(struct bch_fs *c)
+{
+ __bch2_flush_fsck_errs(c, false);
+}
+
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
subvol_inum inum, u64 offset)
{
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index d0d024dc714b..4a364fd44abe 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
_flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__)
void bch2_flush_fsck_errs(struct bch_fs *);
+void bch2_free_fsck_errs(struct bch_fs *);
#define fsck_err_wrap(_do) \
({ \
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 14886e1d4d6d..a82dfce9e4ad 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -21,206 +21,6 @@
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-struct flags_set {
- unsigned mask;
- unsigned flags;
-
- unsigned projid;
-
- bool set_projinherit;
- bool projinherit;
-};
-
-static int bch2_inode_flags_set(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi,
- void *p)
-{
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- /*
- * We're relying on btree locking here for exclusion with other ioctl
- * calls - use the flags in the btree (@bi), not inode->i_flags:
- */
- struct flags_set *s = p;
- unsigned newflags = s->flags;
- unsigned oldflags = bi->bi_flags & s->mask;
-
- if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
- !capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
- if (!S_ISREG(bi->bi_mode) &&
- !S_ISDIR(bi->bi_mode) &&
- (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
- return -EINVAL;
-
- if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
-#ifdef CONFIG_UNICODE
- int ret = 0;
- /* Not supported on individual files. */
- if (!S_ISDIR(bi->bi_mode))
- return -EOPNOTSUPP;
-
- /*
- * Make sure the dir is empty, as otherwise we'd need to
- * rehash everything and update the dirent keys.
- */
- ret = bch2_empty_dir_trans(trans, inode_inum(inode));
- if (ret < 0)
- return ret;
-
- ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
- if (ret)
- return ret;
-
- bch2_check_set_feature(c, BCH_FEATURE_casefolding);
-#else
- printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
- return -EOPNOTSUPP;
-#endif
- }
-
- if (s->set_projinherit) {
- bi->bi_fields_set &= ~(1 << Inode_opt_project);
- bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
- }
-
- bi->bi_flags &= ~s->mask;
- bi->bi_flags |= newflags;
-
- bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
- return 0;
-}
-
-static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-{
- unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-
- return put_user(flags, arg);
-}
-
-static int bch2_ioc_setflags(struct bch_fs *c,
- struct file *file,
- struct bch_inode_info *inode,
- void __user *arg)
-{
- struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
- unsigned uflags;
- int ret;
-
- if (get_user(uflags, (int __user *) arg))
- return -EFAULT;
-
- s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
- if (uflags)
- return -EOPNOTSUPP;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- inode_lock(&inode->v);
- if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
- ret = -EACCES;
- goto setflags_out;
- }
-
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
- bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
- ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
-
-setflags_out:
- inode_unlock(&inode->v);
- mnt_drop_write_file(file);
- return ret;
-}
-
-static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
- struct fsxattr __user *arg)
-{
- struct fsxattr fa = { 0 };
-
- fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-
- if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
- fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
-
- fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -EFAULT;
-
- return 0;
-}
-
-static int fssetxattr_inode_update_fn(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi,
- void *p)
-{
- struct flags_set *s = p;
-
- if (s->projid != bi->bi_project) {
- bi->bi_fields_set |= 1U << Inode_opt_project;
- bi->bi_project = s->projid;
- }
-
- return bch2_inode_flags_set(trans, inode, bi, p);
-}
-
-static int bch2_ioc_fssetxattr(struct bch_fs *c,
- struct file *file,
- struct bch_inode_info *inode,
- struct fsxattr __user *arg)
-{
- struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
- struct fsxattr fa;
- int ret;
-
- if (copy_from_user(&fa, arg, sizeof(fa)))
- return -EFAULT;
-
- s.set_projinherit = true;
- s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
- fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
-
- s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
- if (fa.fsx_xflags)
- return -EOPNOTSUPP;
-
- if (fa.fsx_projid >= U32_MAX)
- return -EINVAL;
-
- /*
- * inode fields accessible via the xattr interface are stored with a +1
- * bias, so that 0 means unset:
- */
- s.projid = fa.fsx_projid + 1;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- inode_lock(&inode->v);
- if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
- ret = -EACCES;
- goto err;
- }
-
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
- bch2_set_projid(c, inode, fa.fsx_projid) ?:
- bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
- ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
-err:
- inode_unlock(&inode->v);
- mnt_drop_write_file(file);
- return ret;
-}
-
static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
long ret;
switch (cmd) {
- case FS_IOC_GETFLAGS:
- ret = bch2_ioc_getflags(inode, (int __user *) arg);
- break;
-
- case FS_IOC_SETFLAGS:
- ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
- break;
-
- case FS_IOC_FSGETXATTR:
- ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
- break;
-
- case FS_IOC_FSSETXATTR:
- ret = bch2_ioc_fssetxattr(c, file, inode,
- (void __user *) arg);
- break;
-
case BCHFS_IOC_REINHERIT_ATTRS:
ret = bch2_ioc_reinherit_attrs(c, file, inode,
(void __user *) arg);
diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
index ecd3bfdcde21..a657e4994b71 100644
--- a/fs/bcachefs/fs-ioctl.h
+++ b/fs/bcachefs/fs-ioctl.h
@@ -2,81 +2,6 @@
#ifndef _BCACHEFS_FS_IOCTL_H
#define _BCACHEFS_FS_IOCTL_H
-/* Inode flags: */
-
-/* bcachefs inode flags -> vfs inode flags: */
-static const __maybe_unused unsigned bch_flags_to_vfs[] = {
- [__BCH_INODE_sync] = S_SYNC,
- [__BCH_INODE_immutable] = S_IMMUTABLE,
- [__BCH_INODE_append] = S_APPEND,
- [__BCH_INODE_noatime] = S_NOATIME,
- [__BCH_INODE_casefolded] = S_CASEFOLD,
-};
-
-/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-static const __maybe_unused unsigned bch_flags_to_uflags[] = {
- [__BCH_INODE_sync] = FS_SYNC_FL,
- [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
- [__BCH_INODE_append] = FS_APPEND_FL,
- [__BCH_INODE_nodump] = FS_NODUMP_FL,
- [__BCH_INODE_noatime] = FS_NOATIME_FL,
- [__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
-};
-
-/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-static const __maybe_unused unsigned bch_flags_to_xflags[] = {
- [__BCH_INODE_sync] = FS_XFLAG_SYNC,
- [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
- [__BCH_INODE_append] = FS_XFLAG_APPEND,
- [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
- [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
- //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-};
-
-#define set_flags(_map, _in, _out) \
-do { \
- unsigned _i; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & (1 << _i)) \
- (_out) |= _map[_i]; \
- else \
- (_out) &= ~_map[_i]; \
-} while (0)
-
-#define map_flags(_map, _in) \
-({ \
- unsigned _out = 0; \
- \
- set_flags(_map, _in, _out); \
- _out; \
-})
-
-#define map_flags_rev(_map, _in) \
-({ \
- unsigned _i, _out = 0; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & _map[_i]) { \
- (_out) |= 1 << _i; \
- (_in) &= ~_map[_i]; \
- } \
- (_out); \
-})
-
-#define map_defined(_map) \
-({ \
- unsigned _in = ~0; \
- \
- map_flags_rev(_map, _in); \
-})
-
-/* Set VFS inode flags from bcachefs inode: */
-static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-{
- set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-}
-
long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 5a41b1a8e54f..0f1d61aab90b 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -33,6 +33,7 @@
#include <linux/backing-dev.h>
#include <linux/exportfs.h>
#include <linux/fiemap.h>
+#include <linux/fileattr.h>
#include <linux/fs_context.h>
#include <linux/module.h>
#include <linux/pagemap.h>
@@ -51,6 +52,22 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
struct bch_subvolume *);
+/* Set VFS inode flags from bcachefs inode: */
+static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode)
+{
+ static const __maybe_unused unsigned bch_flags_to_vfs[] = {
+ [__BCH_INODE_sync] = S_SYNC,
+ [__BCH_INODE_immutable] = S_IMMUTABLE,
+ [__BCH_INODE_append] = S_APPEND,
+ [__BCH_INODE_noatime] = S_NOATIME,
+ };
+
+ set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
+
+ if (bch2_inode_casefold(c, &inode->ei_inode))
+ inode->v.i_flags |= S_CASEFOLD;
+}
+
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@@ -79,7 +96,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
inode->ei_inode = *bi;
- bch2_inode_flags_to_vfs(inode);
+ bch2_inode_flags_to_vfs(c, inode);
}
int __must_check bch2_write_inode(struct bch_fs *c,
@@ -631,13 +648,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
const struct qstr *name)
{
struct bch_fs *c = trans->c;
- struct btree_iter dirent_iter = {};
subvol_inum inum = {};
struct printbuf buf = PRINTBUF;
+ struct qstr lookup_name;
+ int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
+ if (ret)
+ return ERR_PTR(ret);
+
+ struct btree_iter dirent_iter = {};
struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
- dir_hash_info, dir, name, 0);
- int ret = bkey_err(k);
+ dir_hash_info, dir, &lookup_name, 0);
+ ret = bkey_err(k);
if (ret)
return ERR_PTR(ret);
@@ -825,6 +847,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
*/
set_nlink(&inode->v, 0);
}
+
+ if (IS_CASEFOLDED(vdir)) {
+ d_invalidate(dentry);
+ d_prune_aliases(&inode->v);
+ }
err:
bch2_trans_put(trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
@@ -1235,10 +1262,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap,
return finish_open_simple(file, 0);
}
+struct bch_fiemap_extent {
+ struct bkey_buf kbuf;
+ unsigned flags;
+};
+
static int bch2_fill_extent(struct bch_fs *c,
struct fiemap_extent_info *info,
- struct bkey_s_c k, unsigned flags)
+ struct bch_fiemap_extent *fe)
{
+ struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k);
+ unsigned flags = fe->flags;
+
+ BUG_ON(!k.k->size);
+
if (bkey_extent_is_direct_data(k.k)) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1291,110 +1328,223 @@ static int bch2_fill_extent(struct bch_fs *c,
}
}
-static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
- u64 start, u64 len)
+/*
+ * Scan a range of an inode for data in pagecache.
+ *
+ * Intended to be retryable, so don't modify the output params until success is
+ * imminent.
+ */
+static int
+bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end,
+ bool nonblock)
{
- struct bch_fs *c = vinode->i_sb->s_fs_info;
- struct bch_inode_info *ei = to_bch_ei(vinode);
- struct btree_trans *trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_buf cur, prev;
- bool have_extent = false;
- int ret = 0;
+ loff_t dstart, dend;
- ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
- if (ret)
+ dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock);
+ if (dstart < 0)
+ return dstart;
+
+ if (dstart == *end) {
+ *start = dstart;
+ return 0;
+ }
+
+ dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock);
+ if (dend < 0)
+ return dend;
+
+ /* race */
+ BUG_ON(dstart == dend);
+
+ *start = dstart;
+ *end = dend;
+ return 0;
+}
+
+/*
+ * Scan a range of pagecache that corresponds to a file mapping hole in the
+ * extent btree. If data is found, fake up an extent key so it looks like a
+ * delalloc extent to the rest of the fiemap processing code.
+ */
+static int
+bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode,
+ u64 start, u64 end, struct bch_fiemap_extent *cur)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i_extent *delextent;
+ struct bch_extent_ptr ptr = {};
+ loff_t dstart = start << 9, dend = end << 9;
+ int ret;
+
+ /*
+ * We hold btree locks here so we cannot block on folio locks without
+ * dropping trans locks first. Run a nonblocking scan for the common
+ * case of no folios over holes and fall back on failure.
+ *
+ * Note that dropping locks like this is technically racy against
+ * writeback inserting to the extent tree, but a non-sync fiemap scan is
+ * fundamentally racy with writeback anyways. Therefore, just report the
+ * range as delalloc regardless of whether we have to cycle trans locks.
+ */
+ ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true);
+ if (ret == -EAGAIN)
+ ret = drop_locks_do(trans,
+ bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false));
+ if (ret < 0)
return ret;
- struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
- if (start + len < start)
- return -EINVAL;
+ /*
+ * Create a fake extent key in the buffer. We have to add a dummy extent
+ * pointer for the fill code to add an extent entry. It's explicitly
+ * zeroed to reflect delayed allocation (i.e. phys offset 0).
+ */
+ bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64));
+ delextent = bkey_extent_init(cur->kbuf.k);
+ delextent->k.p = POS(inode->ei_inum.inum, dend >> 9);
+ delextent->k.size = (dend - dstart) >> 9;
+ bch2_bkey_append_ptr(&delextent->k_i, ptr);
- start >>= 9;
+ cur->flags = FIEMAP_EXTENT_DELALLOC;
- bch2_bkey_buf_init(&cur);
- bch2_bkey_buf_init(&prev);
- trans = bch2_trans_get(c);
+ return 0;
+}
+
+static int bch2_next_fiemap_extent(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ u64 start, u64 end,
+ struct bch_fiemap_extent *cur)
+{
+ u32 snapshot;
+ int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+ struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
- POS(ei->v.i_ino, start), 0);
+ SPOS(inode->ei_inum.inum, start, snapshot), 0);
- while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- enum btree_id data_btree = BTREE_ID_extents;
+ struct bkey_s_c k =
+ bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end));
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
- bch2_trans_begin(trans);
+ ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
+ if (ret)
+ goto err;
- u32 snapshot;
- ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
- if (ret)
- continue;
+ struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k);
- bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
+ /*
+ * Does the pagecache or the btree take precedence?
+ *
+ * It _should_ be the pagecache, so that we correctly report delalloc
+ * extents when dirty in the pagecache (we're COW, after all).
+ *
+ * But we'd have to add per-sector writeback tracking to
+ * bch_folio_state, otherwise we report delalloc extents for clean
+ * cached data in the pagecache.
+ *
+ * We should do this, but even then fiemap won't report stable mappings:
+ * on bcachefs data moves around in the background (copygc, rebalance)
+ * and we don't provide a way for userspace to lock that out.
+ */
+ if (k.k &&
+ bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)),
+ pagecache_start)) {
+ bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k);
+ bch2_cut_front(iter.pos, cur->kbuf.k);
+ bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k);
+ cur->flags = 0;
+ } else if (k.k) {
+ bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k);
+ }
- k = bch2_btree_iter_peek_max(trans, &iter, end);
- ret = bkey_err(k);
+ if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) {
+ unsigned sectors = cur->kbuf.k->k.size;
+ s64 offset_into_extent = 0;
+ enum btree_id data_btree = BTREE_ID_extents;
+ int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent,
+ &cur->kbuf);
if (ret)
- continue;
+ goto err;
- if (!k.k)
- break;
+ struct bkey_i *k = cur->kbuf.k;
+ sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent);
- if (!bkey_extent_is_data(k.k) &&
- k.k->type != KEY_TYPE_reservation) {
- bch2_btree_iter_advance(trans, &iter);
- continue;
- }
+ bch2_cut_front(POS(k->k.p.inode,
+ bkey_start_offset(&k->k) + offset_into_extent),
+ k);
+ bch2_key_resize(&k->k, sectors);
+ k->k.p = iter.pos;
+ k->k.p.offset += k->k.size;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
- s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
- unsigned sectors = k.k->size - offset_into_extent;
+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
+ u64 start, u64 len)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(vinode);
+ struct btree_trans *trans;
+ struct bch_fiemap_extent cur, prev;
+ int ret = 0;
+
+ ret = fiemap_prep(&ei->v, info, start, &len, 0);
+ if (ret)
+ return ret;
+
+ if (start + len < start)
+ return -EINVAL;
+
+ start >>= 9;
+ u64 end = (start + len) >> 9;
- bch2_bkey_buf_reassemble(&cur, c, k);
+ bch2_bkey_buf_init(&cur.kbuf);
+ bch2_bkey_buf_init(&prev.kbuf);
+ bkey_init(&prev.kbuf.k->k);
- ret = bch2_read_indirect_extent(trans, &data_btree,
- &offset_into_extent, &cur);
+ trans = bch2_trans_get(c);
+
+ while (start < end) {
+ ret = lockrestart_do(trans,
+ bch2_next_fiemap_extent(trans, ei, start, end, &cur));
if (ret)
- continue;
+ goto err;
- k = bkey_i_to_s_c(cur.k);
- bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
+ BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start);
+ BUG_ON(cur.kbuf.k->k.p.offset > end);
- sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
+ if (bkey_start_offset(&cur.kbuf.k->k) == end)
+ break;
- bch2_cut_front(POS(k.k->p.inode,
- bkey_start_offset(k.k) +
- offset_into_extent),
- cur.k);
- bch2_key_resize(&cur.k->k, sectors);
- cur.k->k.p = iter.pos;
- cur.k->k.p.offset += cur.k->k.size;
+ start = cur.kbuf.k->k.p.offset;
- if (have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info,
- bkey_i_to_s_c(prev.k), 0);
+ ret = bch2_fill_extent(c, info, &prev);
if (ret)
- break;
+ goto err;
}
- bkey_copy(prev.k, cur.k);
- have_extent = true;
-
- bch2_btree_iter_set_pos(trans, &iter,
- POS(iter.pos.inode, iter.pos.offset + sectors));
+ bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k);
+ prev.flags = cur.flags;
}
- bch2_trans_iter_exit(trans, &iter);
- if (!ret && have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
- FIEMAP_EXTENT_LAST);
+ prev.flags |= FIEMAP_EXTENT_LAST;
+ ret = bch2_fill_extent(c, info, &prev);
}
-
+err:
bch2_trans_put(trans);
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret < 0 ? ret : 0;
+ bch2_bkey_buf_exit(&cur.kbuf, c);
+ bch2_bkey_buf_exit(&prev.kbuf, c);
+
+ return bch2_err_class(ret < 0 ? ret : 0);
}
static const struct vm_operations_struct bch_vm_ops = {
@@ -1449,6 +1599,165 @@ static int bch2_open(struct inode *vinode, struct file *file)
return generic_file_open(vinode, file);
}
+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
+static const __maybe_unused unsigned bch_flags_to_uflags[] = {
+ [__BCH_INODE_sync] = FS_SYNC_FL,
+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+ [__BCH_INODE_append] = FS_APPEND_FL,
+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
+};
+
+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
+static const __maybe_unused unsigned bch_flags_to_xflags[] = {
+ [__BCH_INODE_sync] = FS_XFLAG_SYNC,
+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
+ [__BCH_INODE_append] = FS_XFLAG_APPEND,
+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
+};
+
+static int bch2_fileattr_get(struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
+
+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
+ fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
+
+ if (bch2_inode_casefold(c, &inode->ei_inode))
+ fa->flags |= FS_CASEFOLD_FL;
+
+ fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
+ return 0;
+}
+
+struct flags_set {
+ unsigned mask;
+ unsigned flags;
+ unsigned projid;
+ bool set_project;
+ bool set_casefold;
+ bool casefold;
+};
+
+static int fssetxattr_inode_update_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_fs *c = trans->c;
+ struct flags_set *s = p;
+
+ /*
+ * We're relying on btree locking here for exclusion with other ioctl
+ * calls - use the flags in the btree (@bi), not inode->i_flags:
+ */
+ if (!S_ISREG(bi->bi_mode) &&
+ !S_ISDIR(bi->bi_mode) &&
+ (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
+ return -EINVAL;
+
+ if (s->casefold != bch2_inode_casefold(c, bi)) {
+#ifdef CONFIG_UNICODE
+ int ret = 0;
+ /* Not supported on individual files. */
+ if (!S_ISDIR(bi->bi_mode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Make sure the dir is empty, as otherwise we'd need to
+ * rehash everything and update the dirent keys.
+ */
+ ret = bch2_empty_dir_trans(trans, inode_inum(inode));
+ if (ret < 0)
+ return ret;
+
+ ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+ if (ret)
+ return ret;
+
+ bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+
+ bi->bi_casefold = s->casefold + 1;
+ bi->bi_fields_set |= BIT(Inode_opt_casefold);
+
+#else
+ printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ if (s->set_project) {
+ bi->bi_project = s->projid;
+ bi->bi_fields_set |= BIT(Inode_opt_project);
+ }
+
+ bi->bi_flags &= ~s->mask;
+ bi->bi_flags |= s->flags;
+
+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
+ return 0;
+}
+
+static int bch2_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct flags_set s = {};
+ int ret;
+
+ if (fa->fsx_valid) {
+ fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
+
+ s.mask = map_defined(bch_flags_to_xflags);
+ s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags);
+ if (fa->fsx_xflags)
+ return -EOPNOTSUPP;
+
+ if (fa->fsx_projid >= U32_MAX)
+ return -EINVAL;
+
+ /*
+ * inode fields accessible via the xattr interface are stored with a +1
+ * bias, so that 0 means unset:
+ */
+ if ((inode->ei_inode.bi_project ||
+ fa->fsx_projid) &&
+ inode->ei_inode.bi_project != fa->fsx_projid + 1) {
+ s.projid = fa->fsx_projid + 1;
+ s.set_project = true;
+ }
+ }
+
+ if (fa->flags_valid) {
+ s.mask = map_defined(bch_flags_to_uflags);
+
+ s.set_casefold = true;
+ s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0;
+ fa->flags &= ~FS_CASEFOLD_FL;
+
+ s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags);
+ if (fa->flags)
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
+ (s.set_project
+ ? bch2_set_projid(c, inode, fa->fsx_projid)
+ : 0) ?:
+ bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+ ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+ return ret;
+}
+
static const struct file_operations bch_file_operations = {
.open = bch2_open,
.llseek = bch2_llseek,
@@ -1476,6 +1785,8 @@ static const struct inode_operations bch_file_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_dir_inode_operations = {
@@ -1496,6 +1807,8 @@ static const struct inode_operations bch_dir_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct file_operations bch_dir_file_operations = {
@@ -1518,6 +1831,8 @@ static const struct inode_operations bch_symlink_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_special_inode_operations = {
@@ -1528,6 +1843,8 @@ static const struct inode_operations bch_special_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct address_space_operations bch_address_space_operations = {
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index f82cfbf460d0..c74af15b14f2 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
}
}
+static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
+{
+ /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
+ return bi->bi_casefold
+ ? bi->bi_casefold - 1
+ : c->opts.casefold;
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
index 117110af1e3f..87e193e8ed25 100644
--- a/fs/bcachefs/inode_format.h
+++ b/fs/bcachefs/inode_format.h
@@ -103,7 +103,8 @@ struct bch_inode_generation {
x(bi_parent_subvol, 32) \
x(bi_nocow, 8) \
x(bi_depth, 32) \
- x(bi_inodes_32bit, 8)
+ x(bi_inodes_32bit, 8) \
+ x(bi_casefold, 8)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
@@ -117,7 +118,8 @@ struct bch_inode_generation {
x(background_target, 16) \
x(erasure_code, 16) \
x(nocow, 8) \
- x(inodes_32bit, 8)
+ x(inodes_32bit, 8) \
+ x(casefold, 8)
enum inode_opt_id {
#define x(name, ...) \
@@ -137,8 +139,7 @@ enum inode_opt_id {
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
x(backptr_untrusted, 8) \
- x(has_child_snapshot, 9) \
- x(casefolded, 10)
+ x(has_child_snapshot, 9)
/* bits 20+ reserved for packed fields below: */
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index d8f74b6d0a75..bb45d3634194 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
buf->u64s_reserved) << c->block_bits;
- BUG_ON(sectors > buf->sectors);
+ if (unlikely(sectors > buf->sectors)) {
+ struct printbuf err = PRINTBUF;
+ err.atomic++;
+
+ prt_printf(&err, "journal entry overran reserved space: %u > %u\n",
+ sectors, buf->sectors);
+ prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n",
+ le32_to_cpu(buf->data->u64s), buf->u64s_reserved,
+ j->cur_entry_u64s,
+ c->block_bits);
+ prt_printf(&err, "fatal error - emergency read only");
+ bch2_journal_halt_locked(j);
+
+ bch_err(c, "%s", err.buf);
+ printbuf_exit(&err);
+ return;
+ }
+
buf->sectors = sectors;
/*
@@ -1462,8 +1479,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->last_empty_seq = cur_seq - 1; /* to match j->seq */
spin_lock(&j->lock);
-
- set_bit(JOURNAL_running, &j->flags);
j->last_flush_write = jiffies;
j->reservations.idx = journal_cur_seq(j);
@@ -1474,6 +1489,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
return 0;
}
+void bch2_journal_set_replay_done(struct journal *j)
+{
+ /*
+ * journal_space_available must happen before setting JOURNAL_running
+ * JOURNAL_running must happen before JOURNAL_replay_done
+ */
+ spin_lock(&j->lock);
+ bch2_journal_space_available(j);
+
+ set_bit(JOURNAL_need_flush_write, &j->flags);
+ set_bit(JOURNAL_running, &j->flags);
+ set_bit(JOURNAL_replay_done, &j->flags);
+ spin_unlock(&j->lock);
+}
+
/* init/exit: */
void bch2_dev_journal_exit(struct bch_dev *ca)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 47828771f9c2..641e20c05a14 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j)
struct bch_dev;
-static inline void bch2_journal_set_replay_done(struct journal *j)
-{
- BUG_ON(!test_bit(JOURNAL_running, &j->flags));
- set_bit(JOURNAL_replay_done, &j->flags);
-}
-
void bch2_journal_unblock(struct journal *);
void bch2_journal_block(struct journal *);
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *);
@@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
int bch2_fs_journal_start(struct journal *, u64);
+void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 5d1547aa118a..ea670c3c43d8 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j)
bch2_journal_set_watermark(j);
out:
- j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
+ j->cur_entry_sectors = !ret
+ ? round_down(j->space[journal_space_discarded].next_entry,
+ block_sectors(c))
+ : 0;
j->cur_entry_error = ret;
if (!ret)
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 159410c50861..96873372b516 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ */
+ kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_should_stop());
+
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),
diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
index ea181fef5bc9..d1885cf67a45 100644
--- a/fs/bcachefs/movinggc.h
+++ b/fs/bcachefs/movinggc.h
@@ -5,6 +5,15 @@
unsigned long bch2_copygc_wait_amount(struct bch_fs *);
void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
+static inline void bch2_copygc_wakeup(struct bch_fs *c)
+{
+ rcu_read_lock();
+ struct task_struct *p = rcu_dereference(c->copygc_thread);
+ if (p)
+ wake_up_process(p);
+ rcu_read_unlock();
+}
+
void bch2_copygc_stop(struct bch_fs *);
int bch2_copygc_start(struct bch_fs *);
void bch2_fs_copygc_init(struct bch_fs *);
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 0d65ea96f7a2..46f3c8b100a9 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
- /* Inherit casefold state from parent. */
- if (S_ISDIR(mode))
- new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
-
if (!(flags & BCH_CREATE_SNAPSHOT)) {
/* Normal create path - allocate a new inode: */
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 4d06313076ff..dfb14810124c 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -228,6 +228,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_ERASURE_CODE, false, \
NULL, "Enable erasure coding (DO NOT USE YET)") \
+ x(casefold, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT, \
+ OPT_BOOL(), \
+ BCH_SB_CASEFOLD, false, \
+ NULL, "Dirent lookups are casefolded") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index c63fa53f30d2..4ccdfc1f34aa 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
int ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
bch2_set_rebalance_needs_scan_trans(trans, inum));
- rebalance_wakeup(c);
+ bch2_rebalance_wakeup(c);
return ret;
}
@@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg)
set_freezable();
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ */
+ kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_should_stop());
+
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
writepoint_ptr(&c->rebalance_write_point),
true);
@@ -664,7 +671,7 @@ void bch2_rebalance_stop(struct bch_fs *c)
c->rebalance.thread = NULL;
if (p) {
- /* for sychronizing with rebalance_wakeup() */
+ /* for sychronizing with bch2_rebalance_wakeup() */
synchronize_rcu();
kthread_stop(p);
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 62a3859d3823..e5e8eb4a2dd1 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
int bch2_set_fs_needs_rebalance(struct bch_fs *);
-static inline void rebalance_wakeup(struct bch_fs *c)
+static inline void bch2_rebalance_wakeup(struct bch_fs *c)
{
struct task_struct *p;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 606d684e6f23..d6c4ef819d40 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -18,6 +18,7 @@
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
+#include "movinggc.h"
#include "namei.h"
#include "quota.h"
#include "rebalance.h"
@@ -1129,13 +1130,13 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- set_bit(BCH_FS_accounting_replay_done, &c->flags);
- bch2_journal_set_replay_done(&c->journal);
-
ret = bch2_fs_read_write_early(c);
if (ret)
goto err;
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
+ bch2_journal_set_replay_done(&c->journal);
+
for_each_member_device(c, ca) {
ret = bch2_dev_usage_init(ca, false);
if (ret) {
@@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c)
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
+ bch2_copygc_wakeup(c);
+ bch2_rebalance_wakeup(c);
+
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
if (ret)
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 593ff142530d..22f72bb5b853 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -12,6 +12,7 @@
#include "journal.h"
#include "lru.h"
#include "logged_ops.h"
+#include "movinggc.h"
#include "rebalance.h"
#include "recovery.h"
#include "recovery_passes.h"
@@ -262,49 +263,52 @@ int bch2_run_recovery_passes(struct bch_fs *c)
*/
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
- c->next_recovery_pass = c->curr_recovery_pass + 1;
+ spin_lock_irq(&c->recovery_pass_lock);
- spin_lock_irq(&c->recovery_pass_lock);
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
+ unsigned prev_done = c->recovery_pass_done;
unsigned pass = c->curr_recovery_pass;
+ c->next_recovery_pass = pass + 1;
+
if (c->opts.recovery_pass_last &&
- c->curr_recovery_pass > c->opts.recovery_pass_last) {
- spin_unlock_irq(&c->recovery_pass_lock);
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
- }
- if (!should_run_recovery_pass(c, pass)) {
- c->curr_recovery_pass++;
- c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ if (should_run_recovery_pass(c, pass)) {
spin_unlock_irq(&c->recovery_pass_lock);
- continue;
- }
- spin_unlock_irq(&c->recovery_pass_lock);
-
- ret = bch2_run_recovery_pass(c, pass) ?:
- bch2_journal_flush(&c->journal);
-
- if (!ret && !test_bit(BCH_FS_error, &c->flags))
- bch2_clear_recovery_pass_required(c, pass);
-
- spin_lock_irq(&c->recovery_pass_lock);
- if (c->next_recovery_pass < c->curr_recovery_pass) {
- /*
- * bch2_run_explicit_recovery_pass() was called: we
- * can't always catch -BCH_ERR_restart_recovery because
- * it may have been called from another thread (btree
- * node read completion)
- */
- ret = 0;
- c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
- } else {
- c->recovery_passes_complete |= BIT_ULL(pass);
- c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ ret = bch2_run_recovery_pass(c, pass) ?:
+ bch2_journal_flush(&c->journal);
+
+ if (!ret && !test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, pass);
+ spin_lock_irq(&c->recovery_pass_lock);
+
+ if (c->next_recovery_pass < c->curr_recovery_pass) {
+ /*
+ * bch2_run_explicit_recovery_pass() was called: we
+ * can't always catch -BCH_ERR_restart_recovery because
+ * it may have been called from another thread (btree
+ * node read completion)
+ */
+ ret = 0;
+ c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
+ } else {
+ c->recovery_passes_complete |= BIT_ULL(pass);
+ c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ }
}
+
c->curr_recovery_pass = c->next_recovery_pass;
- spin_unlock_irq(&c->recovery_pass_lock);
+
+ if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
+ c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
+ bch2_copygc_wakeup(c);
+ bch2_rebalance_wakeup(c);
+ }
}
+ spin_unlock_irq(&c->recovery_pass_lock);
+
return ret;
}
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index b7de29aed839..fec569c7deb1 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
u32 subvol = 0, s;
rcu_read_lock();
- while (id) {
+ while (id && bch2_snapshot_exists(c, id)) {
s = snapshot_t(c, id)->subvol;
if (s && (!subvol || s < subvol))
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 09a354a26c3b..0c1a00539bd1 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
struct bch_hash_info {
u8 type;
- struct unicode_map *cf_encoding;
+ struct unicode_map *cf_encoding;
/*
* For crc32 or crc64 string hashes the first key value of
* the siphash_key (k0) is used as the key.
@@ -44,11 +44,10 @@ struct bch_hash_info {
static inline struct bch_hash_info
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
- /* XXX ick */
struct bch_hash_info info = {
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
- .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
+ .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 25b6bce05c3c..cb5d960aed92 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1102,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c)
prt_str(&buf, ")");
bch2_fs_fatal_error(c, ": %s", buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_sb_not_downgraded;
+ ret = -BCH_ERR_sb_not_downgraded;
+ goto out;
}
darray_for_each(online_devices, ca) {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index e8a17ed1615d..e4ab0595c0ae 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
return ret;
}
-static int bch2_fs_read_write_late(struct bch_fs *c)
-{
- int ret;
-
- /*
- * Data move operations can't run until after check_snapshots has
- * completed, and bch2_snapshot_is_ancestor() is available.
- *
- * Ideally we'd start copygc/rebalance earlier instead of waiting for
- * all of recovery/fsck to complete:
- */
- ret = bch2_copygc_start(c);
- if (ret) {
- bch_err(c, "error starting copygc thread");
- return ret;
- }
-
- ret = bch2_rebalance_start(c);
- if (ret) {
- bch_err(c, "error starting rebalance thread");
- return ret;
- }
-
- return 0;
-}
-
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
int ret;
@@ -466,29 +440,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_clean_shutdown, &c->flags);
+ __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
+ bch2_dev_allocator_add(c, ca);
+ percpu_ref_reinit(&ca->io_ref[WRITE]);
+ }
+ bch2_recalc_capacity(c);
+
/*
* First journal write must be a flush write: after a clean shutdown we
* don't read the journal, so the first journal write may end up
* overwriting whatever was there previously, and there must always be
* at least one non-flush write in the journal or recovery will fail:
*/
+ spin_lock(&c->journal.lock);
set_bit(JOURNAL_need_flush_write, &c->journal.flags);
set_bit(JOURNAL_running, &c->journal.flags);
-
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
- bch2_dev_allocator_add(c, ca);
- percpu_ref_reinit(&ca->io_ref[WRITE]);
- }
- bch2_recalc_capacity(c);
+ bch2_journal_space_available(&c->journal);
+ spin_unlock(&c->journal.lock);
ret = bch2_fs_mark_dirty(c);
if (ret)
goto err;
- spin_lock(&c->journal.lock);
- bch2_journal_space_available(&c->journal);
- spin_unlock(&c->journal.lock);
-
ret = bch2_journal_reclaim_start(&c->journal);
if (ret)
goto err;
@@ -504,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
atomic_long_inc(&c->writes[i]);
}
#endif
- if (!early) {
- ret = bch2_fs_read_write_late(c);
- if (ret)
- goto err;
+
+ ret = bch2_copygc_start(c);
+ if (ret) {
+ bch_err_msg(c, ret, "error starting copygc thread");
+ goto err;
+ }
+
+ ret = bch2_rebalance_start(c);
+ if (ret) {
+ bch_err_msg(c, ret, "error starting rebalance thread");
+ goto err;
}
bch2_do_discards(c);
@@ -553,6 +533,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
+ bch2_free_fsck_errs(c);
bch2_fs_accounting_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
@@ -1023,6 +1004,40 @@ static void print_mount_opts(struct bch_fs *c)
printbuf_exit(&p);
}
+static bool bch2_fs_may_start(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i, flags = 0;
+
+ if (c->opts.very_degraded)
+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
+
+ if (c->opts.degraded)
+ flags |= BCH_FORCE_IF_DEGRADED;
+
+ if (!c->opts.degraded &&
+ !c->opts.very_degraded) {
+ mutex_lock(&c->sb_lock);
+
+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+
+ ca = bch2_dev_locked(c, i);
+
+ if (!bch2_dev_is_online(ca) &&
+ (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ ca->mi.state == BCH_MEMBER_STATE_ro)) {
+ mutex_unlock(&c->sb_lock);
+ return false;
+ }
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
+ return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
+}
+
int bch2_fs_start(struct bch_fs *c)
{
time64_t now = ktime_get_real_seconds();
@@ -1030,6 +1045,9 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
+ if (!bch2_fs_may_start(c))
+ return -BCH_ERR_insufficient_devices_to_start;
+
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
@@ -1082,13 +1100,10 @@ int bch2_fs_start(struct bch_fs *c)
wake_up(&c->ro_ref_wait);
down_write(&c->state_lock);
- if (c->opts.read_only) {
+ if (c->opts.read_only)
bch2_fs_read_only(c);
- } else {
- ret = !test_bit(BCH_FS_rw, &c->flags)
- ? bch2_fs_read_write(c)
- : bch2_fs_read_write_late(c);
- }
+ else if (!test_bit(BCH_FS_rw, &c->flags))
+ ret = bch2_fs_read_write(c);
up_write(&c->state_lock);
err:
@@ -1500,7 +1515,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
printbuf_exit(&name);
- rebalance_wakeup(c);
+ bch2_rebalance_wakeup(c);
return 0;
}
@@ -1559,40 +1574,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
}
}
-static bool bch2_fs_may_start(struct bch_fs *c)
-{
- struct bch_dev *ca;
- unsigned i, flags = 0;
-
- if (c->opts.very_degraded)
- flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
-
- if (c->opts.degraded)
- flags |= BCH_FORCE_IF_DEGRADED;
-
- if (!c->opts.degraded &&
- !c->opts.very_degraded) {
- mutex_lock(&c->sb_lock);
-
- for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
- if (!bch2_member_exists(c->disk_sb.sb, i))
- continue;
-
- ca = bch2_dev_locked(c, i);
-
- if (!bch2_dev_is_online(ca) &&
- (ca->mi.state == BCH_MEMBER_STATE_rw ||
- ca->mi.state == BCH_MEMBER_STATE_ro)) {
- mutex_unlock(&c->sb_lock);
- return false;
- }
- }
- mutex_unlock(&c->sb_lock);
- }
-
- return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
-}
-
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{
bch2_dev_io_ref_stop(ca, WRITE);
@@ -1646,7 +1627,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
if (new_state == BCH_MEMBER_STATE_rw)
__bch2_dev_read_write(c, ca);
- rebalance_wakeup(c);
+ bch2_rebalance_wakeup(c);
return ret;
}
@@ -2228,11 +2209,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
}
up_write(&c->state_lock);
- if (!bch2_fs_may_start(c)) {
- ret = -BCH_ERR_insufficient_devices_to_start;
- goto err_print;
- }
-
if (!c->opts.nostart) {
ret = bch2_fs_start(c);
if (ret)
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index e5f003c29369..82ee333ddd21 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
bch2_set_rebalance_needs_scan(c, 0);
if (v && id == Opt_rebalance_enabled)
- rebalance_wakeup(c);
+ bch2_rebalance_wakeup(c);
- if (v && id == Opt_copygc_enabled &&
- c->copygc_thread)
- wake_up_process(c->copygc_thread);
+ if (v && id == Opt_copygc_enabled)
+ bch2_copygc_wakeup(c);
if (id == Opt_discard && !ca) {
mutex_lock(&c->sb_lock);
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index c265b102267a..782a05fe7656 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
*/
static int test_peek_end(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
@@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 6ba5071ab6dd..3e52c7f8ddd2 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len)
*--dst = *src++;
}
+#define set_flags(_map, _in, _out) \
+do { \
+ unsigned _i; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & (1 << _i)) \
+ (_out) |= _map[_i]; \
+ else \
+ (_out) &= ~_map[_i]; \
+} while (0)
+
+#define map_flags(_map, _in) \
+({ \
+ unsigned _out = 0; \
+ \
+ set_flags(_map, _in, _out); \
+ _out; \
+})
+
+#define map_flags_rev(_map, _in) \
+({ \
+ unsigned _i, _out = 0; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & _map[_i]) { \
+ (_out) |= 1 << _i; \
+ (_in) &= ~_map[_i]; \
+ } \
+ (_out); \
+})
+
+#define map_defined(_map) \
+({ \
+ unsigned _in = ~0; \
+ \
+ map_flags_rev(_map, _in); \
+})
+
#endif /* _BCACHEFS_UTIL_H */
diff --git a/fs/buffer.c b/fs/buffer.c
index c7abb4a029dc..7be23ff20b27 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
}
EXPORT_SYMBOL(end_buffer_write_sync);
-/*
- * Various filesystems appear to want __find_get_block to be non-blocking.
- * But it's the page lock which protects the buffers. To get around this,
- * we get exclusion from try_to_free_buffers with the blockdev mapping's
- * i_private_lock.
- *
- * Hack idea: for the blockdev mapping, i_private_lock contention
- * may be quite high. This code could TryLock the page, and if that
- * succeeds, there is no need to take i_private_lock.
- */
static struct buffer_head *
-__find_get_block_slow(struct block_device *bdev, sector_t block)
+__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
{
struct address_space *bd_mapping = bdev->bd_mapping;
const int blkbits = bd_mapping->host->i_blkbits;
@@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
if (IS_ERR(folio))
goto out;
- spin_lock(&bd_mapping->i_private_lock);
+ /*
+ * Folio lock protects the buffers. Callers that cannot block
+ * will fallback to serializing vs try_to_free_buffers() via
+ * the i_private_lock.
+ */
+ if (atomic)
+ spin_lock(&bd_mapping->i_private_lock);
+ else
+ folio_lock(folio);
+
head = folio_buffers(folio);
if (!head)
goto out_unlock;
+ /*
+ * Upon a noref migration, the folio lock serializes here;
+ * otherwise bail.
+ */
+ if (test_bit_acquire(BH_Migrate, &head->b_state)) {
+ WARN_ON(!atomic);
+ goto out_unlock;
+ }
+
bh = head;
do {
if (!buffer_mapped(bh))
@@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
1 << blkbits);
}
out_unlock:
- spin_unlock(&bd_mapping->i_private_lock);
+ if (atomic)
+ spin_unlock(&bd_mapping->i_private_lock);
+ else
+ folio_unlock(folio);
folio_put(folio);
out:
return ret;
@@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize)
{
- struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
+ struct buffer_head *bh;
+
+ bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
write_dirty_buffer(bh, 0);
@@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
/*
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
* it in the LRU and mark it as accessed. If it is not present then return
- * NULL
+ * NULL. Atomic context callers may also return NULL if the buffer is being
+ * migrated; similarly the page is not marked accessed either.
*/
-struct buffer_head *
-__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
+static struct buffer_head *
+find_get_block_common(struct block_device *bdev, sector_t block,
+ unsigned size, bool atomic)
{
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
/* __find_get_block_slow will mark the page accessed */
- bh = __find_get_block_slow(bdev, block);
+ bh = __find_get_block_slow(bdev, block, atomic);
if (bh)
bh_lru_install(bh);
} else
@@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
return bh;
}
+
+struct buffer_head *
+__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
+{
+ return find_get_block_common(bdev, block, size, true);
+}
EXPORT_SYMBOL(__find_get_block);
+/* same as __find_get_block() but allows sleeping contexts */
+struct buffer_head *
+__find_get_block_nonatomic(struct block_device *bdev, sector_t block,
+ unsigned size)
+{
+ return find_get_block_common(bdev, block, size, false);
+}
+EXPORT_SYMBOL(__find_get_block_nonatomic);
+
/**
* bdev_getblk - Get a buffer_head in a block device's buffer cache.
* @bdev: The block device.
@@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block);
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
- struct buffer_head *bh = __find_get_block(bdev, block, size);
+ struct buffer_head *bh;
+
+ if (gfpflags_allow_blocking(gfp))
+ bh = __find_get_block_nonatomic(bdev, block, size);
+ else
+ bh = __find_get_block(bdev, block, size);
might_alloc(gfp);
if (bh)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 6ac2bd555e86..06cd2963e41e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2367,7 +2367,7 @@ static int fill_fscrypt_truncate(struct inode *inode,
/* Try to writeback the dirty pagecaches */
if (issued & (CEPH_CAP_FILE_BUFFER)) {
- loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1;
ret = filemap_write_and_wait_range(inode->i_mapping,
orig_pos, lend);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 38bc8d74f4cc..e7ecc7c8a729 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
if (!bh || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
- * must have been written out.
+ * must have been written out, or, most unlikely, is
+ * being migrated - false failure should be OK here.
*/
goto out;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f88424c28194..1e98c5be4e0a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6642,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
for (i = 0; i < count; i++) {
cond_resched();
if (is_metadata)
- bh = sb_find_get_block(inode->i_sb, block + i);
+ bh = sb_find_get_block_nonatomic(inode->i_sb,
+ block + i);
ext4_forget(handle, is_metadata, inode, bh, block + i);
}
}
diff --git a/fs/file.c b/fs/file.c
index dc3f7e120e3e..3a3146664cf3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -26,7 +26,7 @@
#include "internal.h"
-bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
+static noinline bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
{
/*
* If the reference count was already in the dead zone, then this
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 0cf0fddbee81..1467f6790747 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
bh = bh_in;
if (!bh) {
- bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
+ bh = __find_get_block_nonatomic(bdev, blocknr,
+ journal->j_blocksize);
if (bh)
BUFFER_TRACE(bh, "found on hash");
}
@@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
/* If there is a different buffer_head lying around in
* memory anywhere... */
- bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
+ bh2 = __find_get_block_nonatomic(bdev, blocknr,
+ journal->j_blocksize);
if (bh2) {
/* ... and it has RevokeValid status... */
if (bh2 != bh && buffer_revokevalid(bh2))
@@ -464,7 +466,8 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
* state machine will get very upset later on. */
if (need_cancel) {
struct buffer_head *bh2;
- bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
+ bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr,
+ bh->b_size);
if (bh2) {
if (bh2 != bh)
clear_buffer_revoked(bh2);
@@ -492,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal)
struct jbd2_revoke_record_s *record;
struct buffer_head *bh;
record = (struct jbd2_revoke_record_s *)list_entry;
- bh = __find_get_block(journal->j_fs_dev,
- record->blocknr,
- journal->j_blocksize);
+ bh = __find_get_block_nonatomic(journal->j_fs_dev,
+ record->blocknr,
+ journal->j_blocksize);
if (bh) {
clear_buffer_revoked(bh);
__brelse(bh);
diff --git a/fs/namespace.c b/fs/namespace.c
index d9ca80dcc544..98a5cd756e9a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,56 +2826,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
struct vfsmount *mnt = path->mnt;
struct dentry *dentry;
struct mountpoint *mp = ERR_PTR(-ENOENT);
+ struct path under = {};
for (;;) {
- struct mount *m;
+ struct mount *m = real_mount(mnt);
if (beneath) {
- m = real_mount(mnt);
+ path_put(&under);
read_seqlock_excl(&mount_lock);
- dentry = dget(m->mnt_mountpoint);
+ under.mnt = mntget(&m->mnt_parent->mnt);
+ under.dentry = dget(m->mnt_mountpoint);
read_sequnlock_excl(&mount_lock);
+ dentry = under.dentry;
} else {
dentry = path->dentry;
}
inode_lock(dentry->d_inode);
- if (unlikely(cant_mount(dentry))) {
- inode_unlock(dentry->d_inode);
- goto out;
- }
-
namespace_lock();
- if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
+ if (unlikely(cant_mount(dentry) || !is_mounted(mnt)))
+ break; // not to be mounted on
+
+ if (beneath && unlikely(m->mnt_mountpoint != dentry ||
+ &m->mnt_parent->mnt != under.mnt)) {
namespace_unlock();
inode_unlock(dentry->d_inode);
- goto out;
+ continue; // got moved
}
mnt = lookup_mnt(path);
- if (likely(!mnt))
+ if (unlikely(mnt)) {
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+ path_put(path);
+ path->mnt = mnt;
+ path->dentry = dget(mnt->mnt_root);
+ continue; // got overmounted
+ }
+ mp = get_mountpoint(dentry);
+ if (IS_ERR(mp))
break;
-
- namespace_unlock();
- inode_unlock(dentry->d_inode);
- if (beneath)
- dput(dentry);
- path_put(path);
- path->mnt = mnt;
- path->dentry = dget(mnt->mnt_root);
- }
-
- mp = get_mountpoint(dentry);
- if (IS_ERR(mp)) {
- namespace_unlock();
- inode_unlock(dentry->d_inode);
+ if (beneath) {
+ /*
+ * @under duplicates the references that will stay
+ * at least until namespace_unlock(), so the path_put()
+ * below is safe (and OK to do under namespace_lock -
+ * we are not dropping the final references here).
+ */
+ path_put(&under);
+ }
+ return mp;
}
-
-out:
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
if (beneath)
- dput(dentry);
-
+ path_put(&under);
return mp;
}
@@ -2886,14 +2892,11 @@ static inline struct mountpoint *lock_mount(struct path *path)
static void unlock_mount(struct mountpoint *where)
{
- struct dentry *dentry = where->m_dentry;
-
+ inode_unlock(where->m_dentry->d_inode);
read_seqlock_excl(&mount_lock);
put_mountpoint(where);
read_sequnlock_excl(&mount_lock);
-
namespace_unlock();
- inode_unlock(dentry->d_inode);
}
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f1b4b3e611cb..c7a9729dc9d0 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1249,7 +1249,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
}
for (i = 0; i < p_blocks; i++, p_blkno++) {
- bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+ bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno,
osb->sb->s_blocksize);
/* block not cached. */
if (!bh)
diff --git a/fs/splice.c b/fs/splice.c
index 90d464241f15..4d6df083e0c0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -45,7 +45,7 @@
* here if set to avoid blocking other users of this pipe if splice is
* being done on it.
*/
-static noinline void noinline pipe_clear_nowait(struct file *file)
+static noinline void pipe_clear_nowait(struct file *file)
{
fmode_t fmode = READ_ONCE(file->f_mode);
diff --git a/fs/xattr.c b/fs/xattr.c
index 02bee149ad96..fabb2a04501e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname,
return error;
filename = getname_maybe_null(pathname, at_flags);
- if (!filename) {
+ if (!filename && dfd >= 0) {
CLASS(fd, f)(dfd);
if (fd_empty(f))
error = -EBADF;
@@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname,
return error;
filename = getname_maybe_null(pathname, at_flags);
- if (!filename) {
+ if (!filename && dfd >= 0) {
CLASS(fd, f)(dfd);
if (fd_empty(f))
return -EBADF;
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 8c541ca71872..81c94dd1d596 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -170,7 +170,8 @@ bool
xfs_zoned_need_gc(
struct xfs_mount *mp)
{
- s64 available, free;
+ s64 available, free, threshold;
+ s32 remainder;
if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE))
return false;
@@ -183,7 +184,12 @@ xfs_zoned_need_gc(
return true;
free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
- if (available < mult_frac(free, mp->m_zonegc_low_space, 100))
+
+ threshold = div_s64_rem(free, 100, &remainder);
+ threshold = threshold * mp->m_zonegc_low_space +
+ remainder * div_s64(mp->m_zonegc_low_space, 100);
+
+ if (available < threshold)
return true;
return false;