summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/dynroot.c4
-rw-r--r--fs/bcachefs/Kconfig7
-rw-r--r--fs/bcachefs/bcachefs.h6
-rw-r--r--fs/bcachefs/btree_journal_iter.c5
-rw-r--r--fs/bcachefs/btree_node_scan.c6
-rw-r--r--fs/bcachefs/btree_update_interior.c2
-rw-r--r--fs/bcachefs/btree_write_buffer.c8
-rw-r--r--fs/bcachefs/buckets.c3
-rw-r--r--fs/bcachefs/buckets.h5
-rw-r--r--fs/bcachefs/checksum.c247
-rw-r--r--fs/bcachefs/checksum.h3
-rw-r--r--fs/bcachefs/data_update.c2
-rw-r--r--fs/bcachefs/dirent.c4
-rw-r--r--fs/bcachefs/errcode.h2
-rw-r--r--fs/bcachefs/extents.c2
-rw-r--r--fs/bcachefs/fs-io-buffered.c17
-rw-r--r--fs/bcachefs/fs-ioctl.c2
-rw-r--r--fs/bcachefs/fsck.c44
-rw-r--r--fs/bcachefs/io_read.c27
-rw-r--r--fs/bcachefs/journal_io.c2
-rw-r--r--fs/bcachefs/recovery.c11
-rw-r--r--fs/bcachefs/sb-errors_format.h4
-rw-r--r--fs/bcachefs/super-io.c20
-rw-r--r--fs/bcachefs/super.c19
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/subpage.c4
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/zoned.c19
-rw-r--r--fs/cachefiles/key.c2
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/dax.c1
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/erofs/Kconfig2
-rw-r--r--fs/erofs/erofs_fs.h8
-rw-r--r--fs/erofs/fileio.c2
-rw-r--r--fs/erofs/zdata.c1
-rw-r--r--fs/erofs/zmap.c5
-rw-r--r--fs/eventpoll.c10
-rw-r--r--fs/ext4/block_validity.c5
-rw-r--r--fs/ext4/inode.c75
-rw-r--r--fs/ext4/mballoc.c18
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/fuse/virtio_fs.c3
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/hfs/bnode.c6
-rw-r--r--fs/hfsplus/bnode.c6
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/namei.c89
-rw-r--r--fs/namespace.c34
-rw-r--r--fs/netfs/fscache_cache.c2
-rw-r--r--fs/netfs/fscache_cookie.c2
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/internal.h7
-rw-r--r--fs/nfs/nfs4session.h4
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsfh.h7
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/super.c5
-rw-r--r--fs/smb/client/cifsencrypt.c16
-rw-r--r--fs/smb/client/cifsglob.h6
-rw-r--r--fs/smb/client/cifspdu.h2
-rw-r--r--fs/smb/client/cifsproto.h2
-rw-r--r--fs/smb/client/connect.c36
-rw-r--r--fs/smb/client/file.c28
-rw-r--r--fs/smb/client/inode.c25
-rw-r--r--fs/smb/client/reparse.c63
-rw-r--r--fs/smb/client/reparse.h5
-rw-r--r--fs/smb/client/sess.c60
-rw-r--r--fs/smb/client/smb1ops.c53
-rw-r--r--fs/smb/client/smb2ops.c14
-rw-r--r--fs/smb/client/smb2pdu.c11
-rw-r--r--fs/smb/common/smb2pdu.h6
-rw-r--r--fs/smb/server/connection.c4
-rw-r--r--fs/smb/server/oplock.c29
-rw-r--r--fs/smb/server/oplock.h1
-rw-r--r--fs/smb/server/smb2pdu.c4
-rw-r--r--fs/smb/server/smb_common.h2
-rw-r--r--fs/smb/server/transport_ipc.c7
-rw-r--r--fs/smb/server/transport_tcp.c14
-rw-r--r--fs/smb/server/transport_tcp.h1
-rw-r--r--fs/smb/server/vfs.c3
-rw-r--r--fs/smb/server/vfs_cache.c8
-rw-r--r--fs/stat.c32
-rw-r--r--fs/xfs/Kconfig2
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_buf_mem.c2
-rw-r--r--fs/xfs/xfs_dquot.c3
-rw-r--r--fs/xfs/xfs_fsmap.c51
-rw-r--r--fs/xfs/xfs_inode_item.c6
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_sysfs.c32
-rw-r--r--fs/xfs/xfs_trans_ail.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h28
-rw-r--r--fs/xfs/xfs_zone_alloc.c7
-rw-r--r--fs/xfs/xfs_zone_gc.c16
105 files changed, 769 insertions, 635 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c718b2e2de0e..5b4847bd2fbb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -368,6 +368,7 @@ config GRACE_PERIOD
config LOCKD
tristate
depends on FILE_LOCKING
+ select CRC32
select GRACE_PERIOD
config LOCKD_V4
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 691e0ae607a1..8c6130789fde 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -348,9 +348,9 @@ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx)
}
if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) {
- rcu_read_lock();
+ down_read(&net->cells_lock);
ret = afs_dynroot_readdir_cells(net, ctx);
- rcu_read_unlock();
+ up_read(&net->cells_lock);
}
return ret;
}
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index bf1c94e51dd0..07709b0d7688 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -4,7 +4,7 @@ config BCACHEFS_FS
depends on BLOCK
select EXPORTFS
select CLOSURES
- select LIBCRC32C
+ select CRC32
select CRC64
select FS_POSIX_ACL
select LZ4_COMPRESS
@@ -15,10 +15,9 @@ config BCACHEFS_FS
select ZLIB_INFLATE
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
- select CRYPTO
select CRYPTO_LIB_SHA256
- select CRYPTO_CHACHA20
- select CRYPTO_POLY1305
+ select CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_POLY1305
select KEYS
select RAID6_PQ
select XOR_BLOCKS
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 5d9f208a1bb7..75f7408da173 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -788,6 +788,8 @@ struct bch_fs {
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
+ DARRAY(enum bcachefs_metadata_version)
+ incompat_versions_requested;
#ifdef CONFIG_UNICODE
struct unicode_map *cf_encoding;
@@ -981,8 +983,8 @@ struct bch_fs {
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
size_t zstd_workspace_size;
- struct crypto_sync_skcipher *chacha20;
- struct crypto_shash *poly1305;
+ struct bch_key chacha20_key;
+ bool chacha20_key_set;
atomic64_t key_version;
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index d1ad1a7613c9..7d6c971db23c 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -644,8 +644,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
*/
static int journal_sort_key_cmp(const void *_l, const void *_r)
{
- cond_resched();
-
const struct journal_key *l = _l;
const struct journal_key *r = _r;
@@ -689,7 +687,8 @@ void bch2_journal_keys_put(struct bch_fs *c)
static void __journal_keys_sort(struct journal_keys *keys)
{
- sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
+ sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
+ journal_sort_key_cmp, NULL);
cond_resched();
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 8c9fdb7263fe..86acf037590c 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -183,7 +183,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
return;
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
- if (!c->chacha20)
+ if (!c->chacha20_key_set)
return;
struct nonce nonce = btree_nonce(&bn->keys, 0);
@@ -398,7 +398,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
bch2_print_string_as_lines(KERN_INFO, buf.buf);
}
- sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
+ sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
dst = 0;
darray_for_each(f->nodes, i) {
@@ -418,7 +418,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
}
f->nodes.nr = dst;
- sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+ sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
if (0 && c->opts.verbose) {
printbuf_reset(&buf);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 55fbeeb8eaaa..44b5fe430370 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1221,7 +1221,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_disk_reservation_get(c, &as->disk_res,
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
- c->opts.metadata_replicas,
+ READ_ONCE(c->opts.metadata_replicas),
disk_res_flags);
if (ret)
goto err;
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index adbe576ec77e..0941fb2c026d 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -428,10 +428,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
*/
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
- sort(wb->flushing.keys.data,
- wb->flushing.keys.nr,
- sizeof(wb->flushing.keys.data[0]),
- wb_key_seq_cmp, NULL);
+ sort_nonatomic(wb->flushing.keys.data,
+ wb->flushing.keys.nr,
+ sizeof(wb->flushing.keys.data[0]),
+ wb_key_seq_cmp, NULL);
darray_for_each(wb->flushing.keys, i) {
if (!i->journal_seq)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index fea61e60a9ee..4ef261e8db4f 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage)
{
memset(usage, 0, sizeof(*usage));
- acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
+ acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage,
+ sizeof(struct bch_dev_usage_full) / sizeof(u64));
}
static u64 reserve_factor(u64 r)
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 1c38b165f48b..8d75b27a1418 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -242,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca,
/* Filesystem usage: */
-static inline unsigned dev_usage_u64s(void)
-{
- return sizeof(struct bch_dev_usage) / sizeof(u64);
-}
-
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 3726689093e3..d0a34a097b80 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -7,17 +7,12 @@
#include "super-io.h"
#include <linux/crc32c.h>
-#include <linux/crypto.h>
#include <linux/xxhash.h>
#include <linux/key.h>
#include <linux/random.h>
#include <linux/ratelimit.h>
-#include <linux/scatterlist.h>
-#include <crypto/algapi.h>
#include <crypto/chacha.h>
-#include <crypto/hash.h>
#include <crypto/poly1305.h>
-#include <crypto/skcipher.h>
#include <keys/user-type.h>
/*
@@ -96,116 +91,40 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void *
}
}
-static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
- struct nonce nonce,
- struct scatterlist *sg, size_t len)
+static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS],
+ const struct bch_key *key, struct nonce nonce)
{
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ u32 key_words[CHACHA_KEY_SIZE / sizeof(u32)];
- skcipher_request_set_sync_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
+ BUILD_BUG_ON(sizeof(key_words) != sizeof(*key));
+ memcpy(key_words, key, sizeof(key_words));
+ le32_to_cpu_array(key_words, ARRAY_SIZE(key_words));
- int ret = crypto_skcipher_encrypt(req);
- if (ret)
- pr_err("got error %i from crypto_skcipher_encrypt()", ret);
-
- return ret;
-}
-
-static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
- struct nonce nonce,
- void *buf, size_t len)
-{
- if (!is_vmalloc_addr(buf)) {
- struct scatterlist sg = {};
-
- sg_mark_end(&sg);
- sg_set_page(&sg, virt_to_page(buf), len, offset_in_page(buf));
- return do_encrypt_sg(tfm, nonce, &sg, len);
- } else {
- DARRAY_PREALLOCATED(struct scatterlist, 4) sgl;
- size_t sgl_len = 0;
- int ret;
-
- darray_init(&sgl);
-
- while (len) {
- unsigned offset = offset_in_page(buf);
- struct scatterlist sg = {
- .page_link = (unsigned long) vmalloc_to_page(buf),
- .offset = offset,
- .length = min(len, PAGE_SIZE - offset),
- };
+ BUILD_BUG_ON(sizeof(nonce) != CHACHA_IV_SIZE);
+ chacha_init(state, key_words, (const u8 *)nonce.d);
- if (darray_push(&sgl, sg)) {
- sg_mark_end(&darray_last(sgl));
- ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len);
- if (ret)
- goto err;
-
- nonce = nonce_add(nonce, sgl_len);
- sgl_len = 0;
- sgl.nr = 0;
- BUG_ON(darray_push(&sgl, sg));
- }
-
- buf += sg.length;
- len -= sg.length;
- sgl_len += sg.length;
- }
-
- sg_mark_end(&darray_last(sgl));
- ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len);
-err:
- darray_exit(&sgl);
- return ret;
- }
+ memzero_explicit(key_words, sizeof(key_words));
}
-int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
- void *buf, size_t len)
+static void bch2_chacha20(const struct bch_key *key, struct nonce nonce,
+ void *data, size_t len)
{
- struct crypto_sync_skcipher *chacha20 =
- crypto_alloc_sync_skcipher("chacha20", 0, 0);
- int ret;
-
- ret = PTR_ERR_OR_ZERO(chacha20);
- if (ret) {
- pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret));
- return ret;
- }
-
- ret = crypto_skcipher_setkey(&chacha20->base,
- (void *) key, sizeof(*key));
- if (ret) {
- pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret));
- goto err;
- }
+ u32 state[CHACHA_STATE_WORDS];
- ret = do_encrypt(chacha20, nonce, buf, len);
-err:
- crypto_free_sync_skcipher(chacha20);
- return ret;
+ bch2_chacha20_init(state, key, nonce);
+ chacha20_crypt(state, data, data, len);
+ memzero_explicit(state, sizeof(state));
}
-static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
- struct nonce nonce)
+static void bch2_poly1305_init(struct poly1305_desc_ctx *desc,
+ struct bch_fs *c, struct nonce nonce)
{
- u8 key[POLY1305_KEY_SIZE];
- int ret;
+ u8 key[POLY1305_KEY_SIZE] = { 0 };
nonce.d[3] ^= BCH_NONCE_POLY;
- memset(key, 0, sizeof(key));
- ret = do_encrypt(c->chacha20, nonce, key, sizeof(key));
- if (ret)
- return ret;
-
- desc->tfm = c->poly1305;
- crypto_shash_init(desc);
- crypto_shash_update(desc, key, sizeof(key));
- return 0;
+ bch2_chacha20(&c->chacha20_key, nonce, key, sizeof(key));
+ poly1305_init(desc, key);
}
struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
@@ -230,14 +149,13 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
case BCH_CSUM_chacha20_poly1305_80:
case BCH_CSUM_chacha20_poly1305_128: {
- SHASH_DESC_ON_STACK(desc, c->poly1305);
+ struct poly1305_desc_ctx dctx;
u8 digest[POLY1305_DIGEST_SIZE];
struct bch_csum ret = { 0 };
- gen_poly_key(c, desc, nonce);
-
- crypto_shash_update(desc, data, len);
- crypto_shash_final(desc, digest);
+ bch2_poly1305_init(&dctx, c, nonce);
+ poly1305_update(&dctx, data, len);
+ poly1305_final(&dctx, digest);
memcpy(&ret, digest, bch_crc_bytes[type]);
return ret;
@@ -253,11 +171,12 @@ int bch2_encrypt(struct bch_fs *c, unsigned type,
if (!bch2_csum_type_is_encryption(type))
return 0;
- if (bch2_fs_inconsistent_on(!c->chacha20,
+ if (bch2_fs_inconsistent_on(!c->chacha20_key_set,
c, "attempting to encrypt without encryption key"))
return -BCH_ERR_no_encryption_key;
- return do_encrypt(c->chacha20, nonce, data, len);
+ bch2_chacha20(&c->chacha20_key, nonce, data, len);
+ return 0;
}
static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
@@ -296,26 +215,26 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
case BCH_CSUM_chacha20_poly1305_80:
case BCH_CSUM_chacha20_poly1305_128: {
- SHASH_DESC_ON_STACK(desc, c->poly1305);
+ struct poly1305_desc_ctx dctx;
u8 digest[POLY1305_DIGEST_SIZE];
struct bch_csum ret = { 0 };
- gen_poly_key(c, desc, nonce);
+ bch2_poly1305_init(&dctx, c, nonce);
#ifdef CONFIG_HIGHMEM
__bio_for_each_segment(bv, bio, *iter, *iter) {
void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
- crypto_shash_update(desc, p, bv.bv_len);
+ poly1305_update(&dctx, p, bv.bv_len);
kunmap_local(p);
}
#else
__bio_for_each_bvec(bv, bio, *iter, *iter)
- crypto_shash_update(desc,
+ poly1305_update(&dctx,
page_address(bv.bv_page) + bv.bv_offset,
bv.bv_len);
#endif
- crypto_shash_final(desc, digest);
+ poly1305_final(&dctx, digest);
memcpy(&ret, digest, bch_crc_bytes[type]);
return ret;
@@ -338,43 +257,33 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
{
struct bio_vec bv;
struct bvec_iter iter;
- DARRAY_PREALLOCATED(struct scatterlist, 4) sgl;
- size_t sgl_len = 0;
+ u32 chacha_state[CHACHA_STATE_WORDS];
int ret = 0;
- if (bch2_fs_inconsistent_on(!c->chacha20,
+ if (bch2_fs_inconsistent_on(!c->chacha20_key_set,
c, "attempting to encrypt without encryption key"))
return -BCH_ERR_no_encryption_key;
- darray_init(&sgl);
+ bch2_chacha20_init(chacha_state, &c->chacha20_key, nonce);
bio_for_each_segment(bv, bio, iter) {
- struct scatterlist sg = {
- .page_link = (unsigned long) bv.bv_page,
- .offset = bv.bv_offset,
- .length = bv.bv_len,
- };
-
- if (darray_push(&sgl, sg)) {
- sg_mark_end(&darray_last(sgl));
- ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len);
- if (ret)
- goto err;
-
- nonce = nonce_add(nonce, sgl_len);
- sgl_len = 0;
- sgl.nr = 0;
-
- BUG_ON(darray_push(&sgl, sg));
+ void *p;
+
+ /*
+ * chacha_crypt() assumes that the length is a multiple of
+ * CHACHA_BLOCK_SIZE on any non-final call.
+ */
+ if (!IS_ALIGNED(bv.bv_len, CHACHA_BLOCK_SIZE)) {
+ bch_err_ratelimited(c, "bio not aligned for encryption");
+ ret = -EIO;
+ break;
}
- sgl_len += sg.length;
+ p = bvec_kmap_local(&bv);
+ chacha20_crypt(chacha_state, p, p, bv.bv_len);
+ kunmap_local(p);
}
-
- sg_mark_end(&darray_last(sgl));
- ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len);
-err:
- darray_exit(&sgl);
+ memzero_explicit(chacha_state, sizeof(chacha_state));
return ret;
}
@@ -650,10 +559,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
}
/* decrypt real key: */
- ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
- &sb_key, sizeof(sb_key));
- if (ret)
- goto err;
+ bch2_chacha20(&user_key, bch2_sb_key_nonce(c), &sb_key, sizeof(sb_key));
if (bch2_key_is_encrypted(&sb_key)) {
bch_err(c, "incorrect encryption key");
@@ -668,31 +574,6 @@ err:
return ret;
}
-static int bch2_alloc_ciphers(struct bch_fs *c)
-{
- if (c->chacha20)
- return 0;
-
- struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
- int ret = PTR_ERR_OR_ZERO(chacha20);
- if (ret) {
- bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
- return ret;
- }
-
- struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0);
- ret = PTR_ERR_OR_ZERO(poly1305);
- if (ret) {
- bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
- crypto_free_sync_skcipher(chacha20);
- return ret;
- }
-
- c->chacha20 = chacha20;
- c->poly1305 = poly1305;
- return 0;
-}
-
#if 0
/*
@@ -797,35 +678,21 @@ err:
void bch2_fs_encryption_exit(struct bch_fs *c)
{
- if (c->poly1305)
- crypto_free_shash(c->poly1305);
- if (c->chacha20)
- crypto_free_sync_skcipher(c->chacha20);
+ memzero_explicit(&c->chacha20_key, sizeof(c->chacha20_key));
}
int bch2_fs_encryption_init(struct bch_fs *c)
{
struct bch_sb_field_crypt *crypt;
- struct bch_key key;
- int ret = 0;
+ int ret;
crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
if (!crypt)
- goto out;
+ return 0;
- ret = bch2_alloc_ciphers(c);
+ ret = bch2_decrypt_sb_key(c, crypt, &c->chacha20_key);
if (ret)
- goto out;
-
- ret = bch2_decrypt_sb_key(c, crypt, &key);
- if (ret)
- goto out;
-
- ret = crypto_skcipher_setkey(&c->chacha20->base,
- (void *) &key.key, sizeof(key.key));
- if (ret)
- goto out;
-out:
- memzero_explicit(&key, sizeof(key));
- return ret;
+ return ret;
+ c->chacha20_key_set = true;
+ return 0;
}
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 4ac251c8fcd8..1310782d3ae9 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -69,7 +69,6 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
bch2_csum_to_text(out, type, expected);
}
-int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *);
@@ -156,7 +155,7 @@ static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
if (type >= BCH_CSUM_NR)
return false;
- if (bch2_csum_type_is_encryption(type) && !c->chacha20)
+ if (bch2_csum_type_is_encryption(type) && !c->chacha20_key_set)
return false;
return true;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index de02ebf847ec..b211c97238ab 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -607,7 +607,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
prt_newline(out);
printbuf_indent_add(out, 2);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
- prt_printf(out, "read_done:\t\%u\n", m->read_done);
+ prt_printf(out, "read_done:\t%u\n", m->read_done);
bch2_write_op_to_text(out, &m->op);
printbuf_indent_sub(out, 2);
}
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index bf53a029f356..8488a7578115 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -287,8 +287,8 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
EBUG_ON(!dirent->v.d_casefold);
EBUG_ON(!cf_name->len);
- dirent->v.d_cf_name_block.d_name_len = name->len;
- dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len;
+ dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
+ dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len);
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index c8696f01eb14..a615e4852ded 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -287,7 +287,7 @@
x(EIO, mark_stripe) \
x(EIO, stripe_reconstruct) \
x(EIO, key_type_error) \
- x(EIO, extent_poisened) \
+ x(EIO, extent_poisoned) \
x(EIO, missing_indirect_extent) \
x(EIO, invalidate_stripe_to_dev) \
x(EIO, no_encryption_key) \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index ae7c7a177e10..dca2b8425cc0 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -139,7 +139,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return -BCH_ERR_extent_poisened;
+ return -BCH_ERR_extent_poisoned;
rcu_read_lock();
const union bch_extent_entry *entry;
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 19d4599918dc..e3a75dcca60c 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -225,11 +225,26 @@ static void bchfs_read(struct btree_trans *trans,
bch2_read_extent(trans, rbio, iter.pos,
data_btree, k, offset_into_extent, flags);
- swap(rbio->bio.bi_iter.bi_size, bytes);
+ /*
+ * Careful there's a landmine here if bch2_read_extent() ever
+ * starts returning transaction restarts here.
+ *
+ * We've changed rbio->bi_iter.bi_size to be "bytes we can read
+ * from this extent" with the swap call, and we restore it
+ * below. That restore needs to come before checking for
+ * errors.
+ *
+ * But unlike __bch2_read(), we use the rbio bvec iter, not one
+ * on the stack, so we can't do the restore right after the
+ * bch2_read_extent() call: we don't own that iterator anymore
+ * if BCH_READ_last_fragment is set, since we may have submitted
+ * that rbio instead of cloning it.
+ */
if (flags & BCH_READ_last_fragment)
break;
+ swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
err:
if (ret &&
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index c1553e44e049..14886e1d4d6d 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -69,7 +69,7 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
if (ret < 0)
return ret;
- ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding);
+ ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
if (ret)
return ret;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 18308f3d64a1..7b25cedd3e40 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -321,6 +321,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)
return false;
+ /*
+ * Subvolume roots are special: older versions of subvolume roots may be
+ * disconnected, it's only the newest version that matters.
+ *
+ * We only keep a single dirent pointing to a subvolume root, i.e.
+ * older versions of snapshots will not have a different dirent pointing
+ * to the same subvolume root.
+ *
+ * This is because dirents that point to subvolumes are only visible in
+ * the parent subvolume - versioning is not needed - and keeping them
+ * around would break fsck, because when we're crossing subvolumes we
+ * don't have a consistent snapshot ID to do check the inode <-> dirent
+ * relationships.
+ *
+ * Thus, a subvolume root that's been renamed after a snapshot will have
+ * a disconnected older version - that's expected.
+ *
+ * Note that taking a snapshot always updates the root inode (to update
+ * the dirent backpointer), so a subvolume root inode with
+ * BCH_INODE_has_child_snapshot is never visible.
+ */
+ if (inode->bi_subvol &&
+ (inode->bi_flags & BCH_INODE_has_child_snapshot))
+ return false;
+
return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
}
@@ -1007,6 +1032,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
+ if ((ret || dirent_points_to_inode_nowarn(d, inode)) &&
+ inode->bi_subvol &&
+ (inode->bi_flags & BCH_INODE_has_child_snapshot)) {
+ /* Older version of a renamed subvolume root: we won't have a
+ * correct dirent for it. That's expected, see
+ * inode_should_reattach().
+ *
+ * We don't clear the backpointer field when doing the rename
+ * because there might be arbitrarily many versions in older
+ * snapshots.
+ */
+ inode->bi_dir = 0;
+ inode->bi_dir_offset = 0;
+ *write_inode = true;
+ goto out;
+ }
+
if (fsck_err_on(ret,
trans, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s",
@@ -1027,7 +1069,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
inode->bi_dir_offset = 0;
*write_inode = true;
}
-
+out:
ret = 0;
fsck_err:
bch2_trans_iter_exit(trans, &dirent_iter);
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 417bb0c7bbfa..def4a26a3b45 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -487,6 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
+ int orig_error = rbio->ret;
+
struct btree_trans *trans = bch2_trans_get(c);
trace_io_read_retry(&rbio->bio);
@@ -519,7 +521,9 @@ static void bch2_rbio_retry(struct work_struct *work)
if (ret) {
rbio->ret = ret;
rbio->bio.bi_status = BLK_STS_IOERR;
- } else {
+ } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
+ orig_error != -BCH_ERR_data_read_ptr_stale_race &&
+ !failed.nr) {
struct printbuf buf = PRINTBUF;
lockrestart_do(trans,
@@ -977,7 +981,8 @@ retry_pick:
goto err;
}
- if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) {
+ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) &&
+ !c->chacha20_key_set) {
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
@@ -1344,14 +1349,16 @@ err:
bch2_trans_iter_exit(trans, &iter);
- if (ret) {
- struct printbuf buf = PRINTBUF;
- lockrestart_do(trans,
- bch2_inum_offset_err_msg_trans(trans, &buf, inum,
- bvec_iter.bi_sector << 9));
- prt_printf(&buf, "read error: %s", bch2_err_str(ret));
- bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ if (unlikely(ret)) {
+ if (ret != -BCH_ERR_extent_poisoned) {
+ struct printbuf buf = PRINTBUF;
+ lockrestart_do(trans,
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum,
+ bvec_iter.bi_sector << 9));
+ prt_printf(&buf, "data read error: %s", bch2_err_str(ret));
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
rbio->bio.bi_status = BLK_STS_IOERR;
rbio->ret = ret;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 1b7961f4f609..2a54ac79189b 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1460,7 +1460,7 @@ fsck_err:
static void journal_advance_devs_to_next_bucket(struct journal *j,
struct dev_alloc_list *devs,
- unsigned sectors, u64 seq)
+ unsigned sectors, __le64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 79fd18a5a07c..606d684e6f23 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -389,9 +389,9 @@ int bch2_journal_replay(struct bch_fs *c)
* Now, replay any remaining keys in the order in which they appear in
* the journal, unpinning those journal entries as we go:
*/
- sort(keys_sorted.data, keys_sorted.nr,
- sizeof(keys_sorted.data[0]),
- journal_sort_seq_cmp, NULL);
+ sort_nonatomic(keys_sorted.data, keys_sorted.nr,
+ sizeof(keys_sorted.data[0]),
+ journal_sort_seq_cmp, NULL);
darray_for_each(keys_sorted, kp) {
cond_resched();
@@ -1125,7 +1125,10 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- bch2_fs_journal_start(&c->journal, 1);
+ ret = bch2_fs_journal_start(&c->journal, 1);
+ if (ret)
+ goto err;
+
set_bit(BCH_FS_accounting_replay_done, &c->flags);
bch2_journal_set_replay_done(&c->journal);
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 5d43e3504386..dc53d25c7cbb 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -290,8 +290,8 @@ enum bch_fsck_flags {
x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \
- x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
- x(snapshot_node_missing, 264, 0) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \
+ x(snapshot_node_missing, 264, FSCK_AUTOFIX) \
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \
x(sb_clean_entry_overrun, 267, 0) \
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index e27422b6d9c6..25b6bce05c3c 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
? 0
: -BCH_ERR_may_not_use_incompat_feature;
+ mutex_lock(&c->sb_lock);
if (!ret) {
- mutex_lock(&c->sb_lock);
SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ } else {
+ darray_for_each(c->incompat_versions_requested, i)
+ if (version == *i)
+ goto out;
+
+ darray_push(&c->incompat_versions_requested, version);
+ struct printbuf buf = PRINTBUF;
+ prt_str(&buf, "requested incompat feature ");
+ bch2_version_to_text(&buf, version);
+ prt_str(&buf, " currently not enabled");
+ prt_printf(&buf, "\n set version_upgrade=incompat to enable");
+
+ bch_notice(c, "%s", buf.buf);
+ printbuf_exit(&buf);
}
+out:
+ mutex_unlock(&c->sb_lock);
+
return ret;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index a58edde43bee..e8a17ed1615d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -70,14 +70,10 @@
#include <linux/percpu.h>
#include <linux/random.h>
#include <linux/sysfs.h>
-#include <crypto/hash.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
MODULE_DESCRIPTION("bcachefs filesystem");
-MODULE_SOFTDEP("pre: chacha20");
-MODULE_SOFTDEP("pre: poly1305");
-MODULE_SOFTDEP("pre: xxhash");
const char * const bch2_fs_flag_strs[] = {
#define x(n) #n,
@@ -593,6 +589,7 @@ static void __bch2_fs_free(struct bch_fs *c)
free_percpu(c->online_reserved);
}
+ darray_exit(&c->incompat_versions_requested);
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
free_percpu(c->usage);
@@ -1002,12 +999,6 @@ static void print_mount_opts(struct bch_fs *c)
prt_str(&p, "starting version ");
bch2_version_to_text(&p, c->sb.version);
- if (c->opts.read_only) {
- prt_str(&p, " opts=");
- first = false;
- prt_printf(&p, "ro");
- }
-
for (i = 0; i < bch2_opts_nr; i++) {
const struct bch_option *opt = &bch2_opt_table[i];
u64 v = bch2_opt_get_by_id(&c->opts, i);
@@ -1023,6 +1014,11 @@ static void print_mount_opts(struct bch_fs *c)
bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
}
+ if (c->sb.version_incompat_allowed != c->sb.version) {
+ prt_printf(&p, "\n allowing incompatible features above ");
+ bch2_version_to_text(&p, c->sb.version_incompat_allowed);
+ }
+
bch_info(c, "%s", p.buf);
printbuf_exit(&p);
}
@@ -1767,7 +1763,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
up_write(&c->state_lock);
return 0;
err:
- if (ca->mi.state == BCH_MEMBER_STATE_rw &&
+ if (test_bit(BCH_FS_rw, &c->flags) &&
+ ca->mi.state == BCH_MEMBER_STATE_rw &&
!percpu_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
up_write(&c->state_lock);
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index fa8515598341..73a2dfb854c5 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -3,9 +3,9 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
+ select CRC32
select CRYPTO
select CRYPTO_CRC32C
- select LIBCRC32C
select CRYPTO_XXHASH
select CRYPTO_SHA256
select CRYPTO_BLAKE2B
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3dd555db3d32..aa58e0663a5d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3853,7 +3853,6 @@ static int write_dev_supers(struct btrfs_device *device,
atomic_inc(&device->sb_write_errors);
continue;
}
- ASSERT(folio_order(folio) == 0);
offset = offset_in_folio(folio, bytenr);
disk_super = folio_address(folio) + offset;
@@ -3926,7 +3925,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
/* If the folio has been removed, then we know it completed. */
if (IS_ERR(folio))
continue;
- ASSERT(folio_order(folio) == 0);
/* Folio will be unlocked once the write completes. */
folio_wait_locked(folio);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 262a707d8990..71b8a825c447 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* will always return true.
* So here we need to do extra page alignment for
* filemap_range_has_page().
+ *
+ * And do not decrease page_lockend right now, as it can be 0.
*/
const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
+ const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE);
while (1) {
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
+ /* The same page or adjacent pages. */
+ if (page_lockend <= page_lockstart)
+ break;
/*
* We can't have ordered extents in the range, nor dirty/writeback
* pages, because we have locked the inode's VFS lock in exclusive
@@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* we do, unlock the range and retry.
*/
if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
- page_lockend))
+ page_lockend - 1))
break;
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a13d81bb56a0..63aeacc54945 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4902,6 +4902,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state,
&disk_bytenr, &disk_io_size);
+ if (ret == -EAGAIN)
+ goto out_acct;
if (ret < 0 && ret != -EIOCBQUEUED)
goto out_free;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f948f4f6431c..e17bcb034595 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3803,7 +3803,7 @@ out:
if (ret) {
if (inode)
iput(&inode->vfs_inode);
- inode = ERR_PTR(ret);
+ return ERR_PTR(ret);
}
return &inode->vfs_inode;
}
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 11dbd7be6a3b..c0a0b8b063d0 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -204,7 +204,7 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
btrfs_blocks_per_folio(fs_info, folio); \
\
btrfs_subpage_assert(fs_info, folio, start, len); \
- __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
+ __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \
__start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \
__start_bit; \
})
@@ -666,7 +666,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
btrfs_blocks_per_folio(fs_info, folio); \
const struct btrfs_subpage *subpage = folio_get_private(folio); \
\
- ASSERT(blocks_per_folio < BITS_PER_LONG); \
+ ASSERT(blocks_per_folio <= BITS_PER_LONG); \
*dst = bitmap_read(subpage->bitmaps, \
blocks_per_folio * btrfs_bitmap_nr_##name, \
blocks_per_folio); \
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 40709e2a44fc..7121d8c7a318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1139,8 +1139,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
subvol_name = btrfs_get_subvol_name_from_objectid(info,
btrfs_root_id(BTRFS_I(d_inode(dentry))->root));
if (!IS_ERR(subvol_name)) {
- seq_puts(seq, ",subvol=");
- seq_escape(seq, subvol_name, " \t\n\\");
+ seq_show_option(seq, "subvol", subvol_name);
kfree(subvol_name);
}
return 0;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 43979891f7c8..2b66a6130269 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -2235,7 +2235,7 @@ int btrfs_verify_level_key(struct extent_buffer *eb,
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, check->level, found_level);
- return -EIO;
+ return -EUCLEAN;
}
if (!check->has_first_key)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index fb8b8b29c169..4a3e02b49f29 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1277,7 +1277,7 @@ struct zone_info {
static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
struct zone_info *info, unsigned long *active,
- struct btrfs_chunk_map *map)
+ struct btrfs_chunk_map *map, bool new)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *device;
@@ -1307,6 +1307,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
return 0;
}
+ ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical));
+
/* This zone will be used for allocation, so mark this zone non-empty. */
btrfs_dev_clear_zone_empty(device, info->physical);
@@ -1319,6 +1321,18 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
* to determine the allocation offset within the zone.
*/
WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
+
+ if (new) {
+ sector_t capacity;
+
+ capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT);
+ up_read(&dev_replace->rwsem);
+ info->alloc_offset = 0;
+ info->capacity = capacity << SECTOR_SHIFT;
+
+ return 0;
+ }
+
nofs_flag = memalloc_nofs_save();
ret = btrfs_get_dev_zone(device, info->physical, &zone);
memalloc_nofs_restore(nofs_flag);
@@ -1588,7 +1602,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
for (i = 0; i < map->num_stripes; i++) {
- ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map);
+ ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map, new);
if (ret)
goto out;
@@ -1659,7 +1673,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
* stripe.
*/
cache->alloc_offset = cache->zone_capacity;
- ret = 0;
}
out:
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
index bf935e25bdbe..b48525680e73 100644
--- a/fs/cachefiles/key.c
+++ b/fs/cachefiles/key.c
@@ -8,7 +8,7 @@
#include <linux/slab.h>
#include "internal.h"
-static const char cachefiles_charmap[64] =
+static const char cachefiles_charmap[64] __nonstring =
"0123456789" /* 0 - 9 */
"abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 7249d70e1a43..3e7def3d31c1 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,7 +3,7 @@ config CEPH_FS
tristate "Ceph distributed file system"
depends on INET
select CEPH_LIB
- select LIBCRC32C
+ select CRC32
select CRYPTO_AES
select CRYPTO
select NETFS_SUPPORT
diff --git a/fs/dax.c b/fs/dax.c
index af5045b0f476..676303419e9e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -396,6 +396,7 @@ static inline unsigned long dax_folio_put(struct folio *folio)
order = folio_order(folio);
if (!order)
return 0;
+ folio_reset_order(folio);
for (i = 0; i < (1UL << order); i++) {
struct dev_pagemap *pgmap = page_pgmap(&folio->page);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 42e4d6eeb29f..9c20d78e41f6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -89,12 +89,12 @@ enum {
};
static const struct fs_parameter_spec devpts_param_specs[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_s32 ("max", Opt_max),
fsparam_u32oct ("mode", Opt_mode),
fsparam_flag ("newinstance", Opt_newinstance),
fsparam_u32oct ("ptmxmode", Opt_ptmxmode),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 331e49cd1b8d..8f68ec49ad89 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -3,8 +3,8 @@
config EROFS_FS
tristate "EROFS filesystem support"
depends on BLOCK
+ select CRC32
select FS_IOMAP
- select LIBCRC32C
help
EROFS (Enhanced Read-Only File System) is a lightweight read-only
file system with modern designs (e.g. no buffer heads, inline
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 9581e9bf8192..767fb4acdc93 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -56,7 +56,7 @@ struct erofs_super_block {
union {
__le16 rootnid_2b; /* nid of root directory */
__le16 blocks_hi; /* (48BIT on) blocks count MSB */
- } rb;
+ } __packed rb;
__le64 inos; /* total valid ino # (== f_files - f_favail) */
__le64 epoch; /* base seconds used for compact inodes */
__le32 fixed_nsec; /* fixed nanoseconds for compact inodes */
@@ -148,7 +148,7 @@ union erofs_inode_i_nb {
__le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */
__le16 blocks_hi; /* total blocks count MSB */
__le16 startblk_hi; /* starting block number MSB */
-};
+} __packed;
/* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact {
@@ -369,9 +369,9 @@ struct z_erofs_map_header {
* bit 7 : pack the whole file into packed inode
*/
__u8 h_clusterbits;
- };
+ } __packed;
__le16 h_extents_hi; /* extent count MSB */
- };
+ } __packed;
};
enum {
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index bec4b56b3826..4fa0a0121288 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -32,6 +32,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
ret = 0;
}
if (rq->bio.bi_end_io) {
+ if (ret < 0 && !rq->bio.bi_status)
+ rq->bio.bi_status = errno_to_blk_status(ret);
rq->bio.bi_end_io(&rq->bio);
} else {
bio_for_each_folio_all(fi, &rq->bio) {
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 0671184d9cf1..5c061aaeeb45 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -725,7 +725,6 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
lockref_init(&pcl->lockref); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->pclustersize = map->m_plen;
- pcl->pageofs_in = pageofs_in;
pcl->length = 0;
pcl->partial = true;
pcl->next = fe->head;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 8de50df05dfe..14ea47f954f5 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -559,7 +559,8 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
pos += sizeof(__le64);
lstart = 0;
} else {
- lstart = map->m_la >> vi->z_lclusterbits;
+ lstart = round_down(map->m_la, 1 << vi->z_lclusterbits);
+ pos += (lstart >> vi->z_lclusterbits) * recsz;
pa = EROFS_NULL_ADDR;
}
@@ -614,7 +615,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
vi->z_fragmentoff = map->m_plen;
- if (recsz >= offsetof(struct z_erofs_extent, pstart_lo))
+ if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
} else if (map->m_plen) {
map->m_flags |= EROFS_MAP_MAPPED |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 100376863a44..4bc264b854c4 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1996,6 +1996,14 @@ static int ep_try_send_events(struct eventpoll *ep,
return res;
}
+static int ep_schedule_timeout(ktime_t *to)
+{
+ if (to)
+ return ktime_after(*to, ktime_get());
+ else
+ return 1;
+}
+
/**
* ep_poll - Retrieves ready events, and delivers them to the caller-supplied
* event buffer.
@@ -2103,7 +2111,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
write_unlock_irq(&ep->lock);
- if (!eavail)
+ if (!eavail && ep_schedule_timeout(to))
timed_out = !schedule_hrtimeout_range(to, slack,
HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 87ee3a17bd29..e8c5525afc67 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line,
{
__le32 *bref = p;
unsigned int blk;
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ if (journal && inode == journal->j_inode)
return 0;
while (bref < p+max) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1dc09ed5d403..94c7d2d828a6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -386,10 +386,11 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal && inode == journal->j_inode)
return 0;
+
if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
@@ -4724,22 +4725,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
inode_set_iversion_queried(inode, val);
}
-static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
-
+static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
+ const char *function, unsigned int line)
{
+ const char *err_str;
+
if (flags & EXT4_IGET_EA_INODE) {
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "missing EA_INODE flag";
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ err_str = "missing EA_INODE flag";
+ goto error;
+ }
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
- EXT4_I(inode)->i_file_acl)
- return "ea_inode with extended attributes";
+ EXT4_I(inode)->i_file_acl) {
+ err_str = "ea_inode with extended attributes";
+ goto error;
+ }
} else {
- if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "unexpected EA_INODE flag";
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ /*
+ * open_by_handle_at() could provide an old inode number
+ * that has since been reused for an ea_inode; this does
+ * not indicate filesystem corruption
+ */
+ if (flags & EXT4_IGET_HANDLE)
+ return -ESTALE;
+ err_str = "unexpected EA_INODE flag";
+ goto error;
+ }
+ }
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
+ err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
+ goto error;
}
- if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
- return "unexpected bad inode w/o EXT4_IGET_BAD";
- return NULL;
+ return 0;
+
+error:
+ ext4_error_inode(inode, function, line, 0, err_str);
+ return -EFSCORRUPTED;
}
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -4751,7 +4773,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode_info *ei;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct inode *inode;
- const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4780,10 +4801,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW)) {
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
+ ret = check_igot_inode(inode, flags, function, line);
+ if (ret) {
iput(inode);
- return ERR_PTR(-EFSCORRUPTED);
+ return ERR_PTR(ret);
}
return inode;
}
@@ -5065,13 +5086,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = -EFSCORRUPTED;
goto bad_inode;
}
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+ ret = check_igot_inode(inode, flags, function, line);
+ /*
+ * -ESTALE here means there is nothing inherently wrong with the inode,
+ * it's just not an inode we can return for an fhandle lookup.
+ */
+ if (ret == -ESTALE) {
+ brelse(iloc.bh);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-ESTALE);
}
-
+ if (ret)
+ goto bad_inode;
brelse(iloc.bh);
+
unlock_new_inode(inode);
return inode;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0d523e9fb3d5..f88424c28194 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3037,10 +3037,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
unsigned char blocksize_bits = min_t(unsigned char,
sb->s_blocksize_bits,
EXT4_MAX_BLOCK_LOG_SIZE);
- struct sg {
- struct ext4_group_info info;
- ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
- } sg;
+ DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters,
+ EXT4_MAX_BLOCK_LOG_SIZE + 2);
group--;
if (group == 0)
@@ -3048,7 +3046,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
" 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
- i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
+ i = (blocksize_bits + 2) * sizeof(sg->bb_counters[0]) +
sizeof(struct ext4_group_info);
grinfo = ext4_get_group_info(sb, group);
@@ -3068,14 +3066,14 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
* We care only about free space counters in the group info and
* these are safe to access even after the buddy has been unloaded
*/
- memcpy(&sg, grinfo, i);
- seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
- sg.info.bb_fragments, sg.info.bb_first_free);
+ memcpy(sg, grinfo, i);
+ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free,
+ sg->bb_fragments, sg->bb_first_free);
for (i = 0; i <= 13; i++)
seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
- sg.info.bb_counters[i] : 0);
+ sg->bb_counters[i] : 0);
seq_puts(seq, " ]");
- if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(sg))
seq_puts(seq, " Block bitmap corrupted!");
seq_putc(seq, '\n');
return 0;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cb5cb33b1d91..e9712e64ec8f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1971,7 +1971,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
* split it in half by count; each resulting block will have at least
* half the space free.
*/
- if (i > 0)
+ if (i >= 0)
split = count - move;
else
split = count/2;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 2c7b24cb67ad..53c2626e90e7 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1669,6 +1669,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
unsigned int virtqueue_size;
int err = -EIO;
+ if (!fsc->source)
+ return invalf(fsc, "No source specified");
+
/* This gets a reference on virtio_fs object. This ptr gets installed
* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
* to drop the reference to this object.
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index be7f87a8e11a..7bd231d16d4a 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,7 +4,6 @@ config GFS2_FS
select BUFFER_HEAD
select FS_POSIX_ACL
select CRC32
- select LIBCRC32C
select QUOTACTL
select FS_IOMAP
help
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 6add6ebfef89..cb823a8a6ba9 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
else
key_len = tree->max_key_len + 1;
+ if (key_len > sizeof(hfs_btree_key) || key_len < 1) {
+ memset(key, 0, sizeof(hfs_btree_key));
+ pr_err("hfs: Invalid key length: %d\n", key_len);
+ return;
+ }
+
hfs_bnode_read(node, key, off, key_len);
}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 87974d5e6791..079ea80534f7 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
else
key_len = tree->max_key_len + 2;
+ if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) {
+ memset(key, 0, sizeof(hfsplus_btree_key));
+ pr_err("hfsplus: Invalid key length: %d\n", key_len);
+ return;
+ }
+
hfs_bnode_read(node, key, off, key_len);
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 31553372b33a..5b08bd417b28 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -259,7 +259,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
}
/* truncate len if we find any trailing uptodate block(s) */
- for ( ; i <= last; i++) {
+ while (++i <= last) {
if (ifs_block_is_uptodate(ifs, i)) {
plen -= (last - i + 1) * block_size;
last = i - 1;
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 35768a63fb1d..421d247fae52 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb,
return NULL;
return isofs_export_iget(sb,
- fh_len > 2 ? ifid->parent_block : 0,
+ fh_len > 3 ? ifid->parent_block : 0,
ifid->parent_offset,
fh_len > 4 ? ifid->parent_generation : 0);
}
diff --git a/fs/namei.c b/fs/namei.c
index 360a86ca1f02..84a0e0b0111c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -125,9 +125,9 @@
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
-static inline void initname(struct filename *name)
+static inline void initname(struct filename *name, const char __user *uptr)
{
- name->uptr = NULL;
+ name->uptr = uptr;
name->aname = NULL;
atomic_set(&name->refcnt, 1);
}
@@ -210,7 +210,7 @@ getname_flags(const char __user *filename, int flags)
return ERR_PTR(-ENAMETOOLONG);
}
}
- initname(result);
+ initname(result, filename);
audit_getname(result);
return result;
}
@@ -268,7 +268,7 @@ struct filename *getname_kernel(const char * filename)
return ERR_PTR(-ENAMETOOLONG);
}
memcpy((char *)result->name, filename, len);
- initname(result);
+ initname(result, NULL);
audit_getname(result);
return result;
}
@@ -1665,27 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name,
return dentry;
}
-/*
- * Parent directory has inode locked exclusive. This is one
- * and only case when ->lookup() gets called on non in-lookup
- * dentries - as the matter of fact, this only gets called
- * when directory is guaranteed to have no in-lookup children
- * at all.
- * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
- * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
- */
-struct dentry *lookup_one_qstr_excl(const struct qstr *name,
- struct dentry *base,
- unsigned int flags)
+static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name,
+ struct dentry *base,
+ unsigned int flags)
{
- struct dentry *dentry = lookup_dcache(name, base, flags);
+ struct dentry *dentry;
struct dentry *old;
- struct inode *dir = base->d_inode;
+ struct inode *dir;
+ dentry = lookup_dcache(name, base, flags);
if (dentry)
- goto found;
+ return dentry;
/* Don't create child dentry for a dead directory. */
+ dir = base->d_inode;
if (unlikely(IS_DEADDIR(dir)))
return ERR_PTR(-ENOENT);
@@ -1698,7 +1691,24 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
dput(dentry);
dentry = old;
}
-found:
+ return dentry;
+}
+
+/*
+ * Parent directory has inode locked exclusive. This is one
+ * and only case when ->lookup() gets called on non in-lookup
+ * dentries - as the matter of fact, this only gets called
+ * when directory is guaranteed to have no in-lookup children
+ * at all.
+ * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
+ * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
+ */
+struct dentry *lookup_one_qstr_excl(const struct qstr *name,
+ struct dentry *base, unsigned int flags)
+{
+ struct dentry *dentry;
+
+ dentry = lookup_one_qstr_excl_raw(name, base, flags);
if (IS_ERR(dentry))
return dentry;
if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
@@ -2742,23 +2752,48 @@ static int filename_parentat(int dfd, struct filename *name,
/* does lookup, returns the object with parent locked */
static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path)
{
+ struct path parent_path __free(path_put) = {};
struct dentry *d;
struct qstr last;
int type, error;
- error = filename_parentat(dfd, name, 0, path, &last, &type);
+ error = filename_parentat(dfd, name, 0, &parent_path, &last, &type);
if (error)
return ERR_PTR(error);
- if (unlikely(type != LAST_NORM)) {
- path_put(path);
+ if (unlikely(type != LAST_NORM))
return ERR_PTR(-EINVAL);
+ inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
+ d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
+ if (IS_ERR(d)) {
+ inode_unlock(parent_path.dentry->d_inode);
+ return d;
}
- inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- d = lookup_one_qstr_excl(&last, path->dentry, 0);
+ path->dentry = no_free_ptr(parent_path.dentry);
+ path->mnt = no_free_ptr(parent_path.mnt);
+ return d;
+}
+
+struct dentry *kern_path_locked_negative(const char *name, struct path *path)
+{
+ struct path parent_path __free(path_put) = {};
+ struct filename *filename __free(putname) = getname_kernel(name);
+ struct dentry *d;
+ struct qstr last;
+ int type, error;
+
+ error = filename_parentat(AT_FDCWD, filename, 0, &parent_path, &last, &type);
+ if (error)
+ return ERR_PTR(error);
+ if (unlikely(type != LAST_NORM))
+ return ERR_PTR(-EINVAL);
+ inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
+ d = lookup_one_qstr_excl_raw(&last, parent_path.dentry, 0);
if (IS_ERR(d)) {
- inode_unlock(path->dentry->d_inode);
- path_put(path);
+ inode_unlock(parent_path.dentry->d_inode);
+ return d;
}
+ path->dentry = no_free_ptr(parent_path.dentry);
+ path->mnt = no_free_ptr(parent_path.mnt);
return d;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 14935a0500a2..d9ca80dcc544 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1830,6 +1830,8 @@ static inline void namespace_lock(void)
down_write(&namespace_sem);
}
+DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock())
+
enum umount_tree_flags {
UMOUNT_SYNC = 1,
UMOUNT_PROPAGATE = 2,
@@ -2383,7 +2385,7 @@ void dissolve_on_fput(struct vfsmount *mnt)
return;
}
- scoped_guard(rwsem_write, &namespace_sem) {
+ scoped_guard(namespace_lock, &namespace_sem) {
ns = m->mnt_ns;
if (!must_dissolve(ns))
return;
@@ -5189,8 +5191,8 @@ static void finish_mount_kattr(struct mount_kattr *kattr)
mnt_idmap_put(kattr->mnt_idmap);
}
-static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize,
- struct mount_kattr *kattr)
+static int wants_mount_setattr(struct mount_attr __user *uattr, size_t usize,
+ struct mount_kattr *kattr)
{
int ret;
struct mount_attr attr;
@@ -5213,9 +5215,13 @@ static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize,
if (attr.attr_set == 0 &&
attr.attr_clr == 0 &&
attr.propagation == 0)
- return 0;
+ return 0; /* Tell caller to not bother. */
+
+ ret = build_mount_kattr(&attr, usize, kattr);
+ if (ret < 0)
+ return ret;
- return build_mount_kattr(&attr, usize, kattr);
+ return 1;
}
SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
@@ -5247,8 +5253,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;
- err = copy_mount_setattr(uattr, usize, &kattr);
- if (err)
+ err = wants_mount_setattr(uattr, usize, &kattr);
+ if (err <= 0)
return err;
err = user_path_at(dfd, path, kattr.lookup_flags, &target);
@@ -5282,15 +5288,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;
- ret = copy_mount_setattr(uattr, usize, &kattr);
- if (ret)
+ ret = wants_mount_setattr(uattr, usize, &kattr);
+ if (ret < 0)
return ret;
- ret = do_mount_setattr(&file->f_path, &kattr);
- if (ret)
- return ret;
+ if (ret) {
+ ret = do_mount_setattr(&file->f_path, &kattr);
+ if (ret)
+ return ret;
- finish_mount_kattr(&kattr);
+ finish_mount_kattr(&kattr);
+ }
}
fd = get_unused_fd_flags(flags & O_CLOEXEC);
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..8f70f8da064b 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -372,7 +372,7 @@ void fscache_withdraw_cache(struct fscache_cache *cache)
EXPORT_SYMBOL(fscache_withdraw_cache);
#ifdef CONFIG_PROC_FS
-static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] = "-PAEW";
+static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] __nonstring = "-PAEW";
/*
* Generate a list of caches in /proc/fs/fscache/caches
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index d4d4b3a8b106..3d56fc73435f 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -29,7 +29,7 @@ static LIST_HEAD(fscache_cookie_lru);
static DEFINE_SPINLOCK(fscache_cookie_lru_lock);
DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out);
static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker);
-static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD";
+static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] __nonstring = "-LCAIFUWRD";
static unsigned int fscache_lru_cookie_timeout = 10 * HZ;
void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 4e3e62040831..70ecc8f5f210 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -127,11 +127,13 @@ static int __init netfs_init(void)
if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0)
goto error_subreqpool;
+#ifdef CONFIG_PROC_FS
if (!proc_mkdir("fs/netfs", NULL))
goto error_proc;
if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL,
&netfs_requests_seq_ops))
goto error_procfile;
+#endif
#ifdef CONFIG_FSCACHE_STATS
if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL,
netfs_stats_show))
@@ -144,9 +146,11 @@ static int __init netfs_init(void)
return 0;
error_fscache:
+#ifdef CONFIG_PROC_FS
error_procfile:
remove_proc_subtree("fs/netfs", NULL);
error_proc:
+#endif
mempool_exit(&netfs_subrequest_pool);
error_subreqpool:
kmem_cache_destroy(netfs_subrequest_slab);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index d3f76101ad4b..07932ce9246c 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -2,6 +2,7 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING && MULTIUSER
+ select CRC32
select LOCKD
select SUNRPC
select NFS_COMMON
@@ -196,7 +197,6 @@ config NFS_USE_KERNEL_DNS
config NFS_DEBUG
bool
depends on NFS_FS && SUNRPC_DEBUG
- select CRC32
default y
config NFS_DISABLE_UDP_SUPPORT
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ec8d32d0e2e9..6655e5f32ec6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -899,18 +899,11 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
-#ifdef CONFIG_CRC32
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
-#else
-static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
-{
- return 0;
-}
-#endif
static inline bool nfs_current_task_exiting(void)
{
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 351616c61df5..f9c291e2165c 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst,
memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN);
}
-#ifdef CONFIG_CRC32
/*
* nfs_session_id_hash - calculate the crc32 hash for the session id
* @session - pointer to session
*/
#define nfs_session_id_hash(sess_id) \
(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
-#else
-#define nfs_session_id_hash(session) (0)
-#endif
#else /* defined(CONFIG_NFS_V4_1) */
static inline int nfs4_init_session(struct nfs_client *clp)
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 792d3fed1b45..731a88f6313e 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -4,6 +4,7 @@ config NFSD
depends on INET
depends on FILE_LOCKING
depends on FSNOTIFY
+ select CRC32
select LOCKD
select SUNRPC
select EXPORTFS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2041268b398a..59a693f22452 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5430,7 +5430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
queued = nfsd4_run_cb(&dp->dl_recall);
WARN_ON_ONCE(!queued);
if (!queued)
- nfs4_put_stid(&dp->dl_stid);
+ refcount_dec(&dp->dl_stid.sc_count);
}
/* Called from break_lease() with flc_lock held. */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 876152a91f12..5103c2f4d225 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
return true;
}
-#ifdef CONFIG_CRC32
/**
* knfsd_fh_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
@@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
}
-#else
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
-{
- return 0;
-}
-#endif
/**
* fh_clear_pre_post_attrs - Reset pre/post attributes
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6f2f8f4cfbbc..aef942a758ce 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -541,8 +541,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d,
bool ovl_is_metacopy_dentry(struct dentry *dentry);
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
int ovl_ensure_verity_loaded(struct path *path);
-int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path,
- u8 *digest_buf, int *buf_length);
int ovl_validate_verity(struct ovl_fs *ofs,
struct path *metapath,
struct path *datapath);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b63474d1b064..e19940d649ca 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1138,6 +1138,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
return ERR_PTR(-EINVAL);
}
+ if (ctx->nr == ctx->nr_data) {
+ pr_err("at least one non-data lowerdir is required\n");
+ return ERR_PTR(-EINVAL);
+ }
+
err = -EINVAL;
for (i = 0; i < ctx->nr; i++) {
l = &ctx->lower[i];
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index e69968e88fe7..35892df7335c 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -704,18 +704,12 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server)
cifs_free_hash(&server->secmech.md5);
cifs_free_hash(&server->secmech.sha512);
- if (!SERVER_IS_CHAN(server)) {
- if (server->secmech.enc) {
- crypto_free_aead(server->secmech.enc);
- server->secmech.enc = NULL;
- }
-
- if (server->secmech.dec) {
- crypto_free_aead(server->secmech.dec);
- server->secmech.dec = NULL;
- }
- } else {
+ if (server->secmech.enc) {
+ crypto_free_aead(server->secmech.enc);
server->secmech.enc = NULL;
+ }
+ if (server->secmech.dec) {
+ crypto_free_aead(server->secmech.dec);
server->secmech.dec = NULL;
}
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 07c4688ec4c9..3b32116b0b49 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -625,10 +625,8 @@ struct smb_version_operations {
bool (*is_status_io_timeout)(char *buf);
/* Check for STATUS_NETWORK_NAME_DELETED */
bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
- int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb,
- const char *full_path,
- struct kvec *rsp_iov,
- struct cifs_open_info_data *data);
+ struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov,
+ u32 *plen);
int (*create_reparse_symlink)(const unsigned int xid,
struct inode *inode,
struct dentry *dentry,
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 48d0d6f439cf..18d67ab113f0 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -2256,6 +2256,8 @@ typedef struct {
#define FILE_SUPPORTS_ENCRYPTION 0x00020000
#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400
+#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200
#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index cfcc07905bdf..59f6fdfe560e 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -163,6 +163,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
struct cifsFileInfo **ret_file);
+extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+ struct file *file);
extern unsigned int smbCalcSize(void *buf);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f298e86a3c1f..df976ce6aed9 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -300,7 +300,6 @@ cifs_abort_connection(struct TCP_Server_Info *server)
server->ssocket->flags);
sock_release(server->ssocket);
server->ssocket = NULL;
- put_net(cifs_net_ns(server));
}
server->sequence_number = 0;
server->session_estab = false;
@@ -1074,13 +1073,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
msleep(125);
if (cifs_rdma_enabled(server))
smbd_destroy(server);
-
if (server->ssocket) {
sock_release(server->ssocket);
server->ssocket = NULL;
-
- /* Release netns reference for the socket. */
- put_net(cifs_net_ns(server));
}
if (!list_empty(&server->pending_mid_q)) {
@@ -1128,7 +1123,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
*/
}
- /* Release netns reference for this server. */
put_net(cifs_net_ns(server));
kfree(server->leaf_fullpath);
kfree(server->hostname);
@@ -1774,8 +1768,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
tcp_ses->ops = ctx->ops;
tcp_ses->vals = ctx->vals;
-
- /* Grab netns reference for this server. */
cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
tcp_ses->sign = ctx->sign;
@@ -1903,7 +1895,6 @@ smbd_connected:
out_err_crypto_release:
cifs_crypto_secmech_release(tcp_ses);
- /* Release netns reference for this server. */
put_net(cifs_net_ns(tcp_ses));
out_err:
@@ -1912,10 +1903,8 @@ out_err:
cifs_put_tcp_session(tcp_ses->primary_server, false);
kfree(tcp_ses->hostname);
kfree(tcp_ses->leaf_fullpath);
- if (tcp_ses->ssocket) {
+ if (tcp_ses->ssocket)
sock_release(tcp_ses->ssocket);
- put_net(cifs_net_ns(tcp_ses));
- }
kfree(tcp_ses);
}
return ERR_PTR(rc);
@@ -2556,6 +2545,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
return 0;
if (tcon->nodelete != ctx->nodelete)
return 0;
+ if (tcon->posix_extensions != ctx->linux_ext)
+ return 0;
return 1;
}
@@ -3357,24 +3348,20 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket = server->ssocket;
} else {
struct net *net = cifs_net_ns(server);
+ struct sock *sk;
- rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket);
+ rc = __sock_create(net, sfamily, SOCK_STREAM,
+ IPPROTO_TCP, &server->ssocket, 1);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
- /*
- * Grab netns reference for the socket.
- *
- * This reference will be released in several situations:
- * - In the failure path before the cifsd thread is started.
- * - In the all place where server->socket is released, it is
- * also set to NULL.
- * - Ultimately in clean_demultiplex_info(), during the final
- * teardown.
- */
- get_net(net);
+ sk = server->ssocket->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
@@ -3426,7 +3413,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc);
trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc);
- put_net(cifs_net_ns(server));
sock_release(socket);
server->ssocket = NULL;
return rc;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 8407fb108664..9e8f404b9e56 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -1007,6 +1007,11 @@ int cifs_open(struct inode *inode, struct file *file)
} else {
_cifsFileInfo_put(cfile, true, false);
}
+ } else {
+ /* hard link on the defeered close file */
+ rc = cifs_get_hardlink_path(tcon, inode, file);
+ if (rc)
+ cifs_close_deferred_file(CIFS_I(inode));
}
if (server->oplocks)
@@ -2071,6 +2076,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
list_move(li, dest);
}
+int
+cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+ struct file *file)
+{
+ struct cifsFileInfo *open_file = NULL;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ int rc = 0;
+
+ spin_lock(&tcon->open_file_lock);
+ spin_lock(&cinode->open_file_lock);
+
+ list_for_each_entry(open_file, &cinode->openFileList, flist) {
+ if (file->f_flags == open_file->f_flags) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+
+ spin_unlock(&cinode->open_file_lock);
+ spin_unlock(&tcon->open_file_lock);
+ return rc;
+}
+
void
cifs_free_llist(struct list_head *llist)
{
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index a00a9d91d0da..75be4b46bc6f 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1203,18 +1203,17 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
goto out;
}
break;
- case IO_REPARSE_TAG_MOUNT_POINT:
- cifs_create_junction_fattr(fattr, sb);
- rc = 0;
- goto out;
default:
/* Check for cached reparse point data */
if (data->symlink_target || data->reparse.buf) {
rc = 0;
- } else if (iov && server->ops->parse_reparse_point) {
- rc = server->ops->parse_reparse_point(cifs_sb,
- full_path,
- iov, data);
+ } else if (iov && server->ops->get_reparse_point_buffer) {
+ struct reparse_data_buffer *reparse_buf;
+ u32 reparse_len;
+
+ reparse_buf = server->ops->get_reparse_point_buffer(iov, &reparse_len);
+ rc = parse_reparse_point(reparse_buf, reparse_len,
+ cifs_sb, full_path, data);
/*
* If the reparse point was not handled but it is the
* name surrogate which points to directory, then treat
@@ -1228,6 +1227,16 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
cifs_create_junction_fattr(fattr, sb);
goto out;
}
+ /*
+ * If the reparse point is unsupported by the Linux SMB
+ * client then let it process by the SMB server. So mask
+ * the -EOPNOTSUPP error code. This will allow Linux SMB
+ * client to send SMB OPEN request to server. If server
+ * does not support this reparse point too then server
+ * will return error during open the path.
+ */
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
}
if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) {
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 2b9e9885dc42..bb25e77c5540 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -542,12 +542,12 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer **buf,
kfree(symname_utf16);
return -ENOMEM;
}
- /* Flag 0x02000000 is unknown, but all wsl symlinks have this value */
- symlink_buf->Flags = cpu_to_le32(0x02000000);
- /* PathBuffer is in UTF-8 but without trailing null-term byte */
+ /* Version field must be set to 2 (MS-FSCC 2.1.2.7) */
+ symlink_buf->Version = cpu_to_le32(2);
+ /* Target for Version 2 is in UTF-8 but without trailing null-term byte */
symname_utf8_len = utf16s_to_utf8s((wchar_t *)symname_utf16, symname_utf16_len/2,
UTF16_LITTLE_ENDIAN,
- symlink_buf->PathBuffer,
+ symlink_buf->Target,
symname_utf8_maxlen);
*buf = (struct reparse_data_buffer *)symlink_buf;
buf_len = sizeof(struct reparse_wsl_symlink_data_buffer) + symname_utf8_len;
@@ -1016,29 +1016,36 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf
struct cifs_open_info_data *data)
{
int len = le16_to_cpu(buf->ReparseDataLength);
+ int data_offset = offsetof(typeof(*buf), Target) - offsetof(typeof(*buf), Version);
int symname_utf8_len;
__le16 *symname_utf16;
int symname_utf16_len;
- if (len <= sizeof(buf->Flags)) {
+ if (len <= data_offset) {
cifs_dbg(VFS, "srv returned malformed wsl symlink buffer\n");
return -EIO;
}
- /* PathBuffer is in UTF-8 but without trailing null-term byte */
- symname_utf8_len = len - sizeof(buf->Flags);
+ /* MS-FSCC 2.1.2.7 defines layout of the Target field only for Version 2. */
+ if (le32_to_cpu(buf->Version) != 2) {
+ cifs_dbg(VFS, "srv returned unsupported wsl symlink version %u\n", le32_to_cpu(buf->Version));
+ return -EIO;
+ }
+
+ /* Target for Version 2 is in UTF-8 but without trailing null-term byte */
+ symname_utf8_len = len - data_offset;
/*
* Check that buffer does not contain null byte
* because Linux cannot process symlink with null byte.
*/
- if (strnlen(buf->PathBuffer, symname_utf8_len) != symname_utf8_len) {
+ if (strnlen(buf->Target, symname_utf8_len) != symname_utf8_len) {
cifs_dbg(VFS, "srv returned null byte in wsl symlink target location\n");
return -EIO;
}
symname_utf16 = kzalloc(symname_utf8_len * 2, GFP_KERNEL);
if (!symname_utf16)
return -ENOMEM;
- symname_utf16_len = utf8s_to_utf16s(buf->PathBuffer, symname_utf8_len,
+ symname_utf16_len = utf8s_to_utf16s(buf->Target, symname_utf8_len,
UTF16_LITTLE_ENDIAN,
(wchar_t *) symname_utf16, symname_utf8_len * 2);
if (symname_utf16_len < 0) {
@@ -1062,8 +1069,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
const char *full_path,
struct cifs_open_info_data *data)
{
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-
data->reparse.buf = buf;
/* See MS-FSCC 2.1.2 */
@@ -1090,24 +1095,17 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
}
return 0;
default:
- cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n",
- le32_to_cpu(buf->ReparseTag));
return -EOPNOTSUPP;
}
}
-int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
- const char *full_path,
- struct kvec *rsp_iov,
- struct cifs_open_info_data *data)
+struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov,
+ u32 *plen)
{
- struct reparse_data_buffer *buf;
struct smb2_ioctl_rsp *io = rsp_iov->iov_base;
- u32 plen = le32_to_cpu(io->OutputCount);
-
- buf = (struct reparse_data_buffer *)((u8 *)io +
- le32_to_cpu(io->OutputOffset));
- return parse_reparse_point(buf, plen, cifs_sb, full_path, data);
+ *plen = le32_to_cpu(io->OutputCount);
+ return (struct reparse_data_buffer *)((u8 *)io +
+ le32_to_cpu(io->OutputOffset));
}
static bool wsl_to_fattr(struct cifs_open_info_data *data,
@@ -1233,16 +1231,6 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
bool ok;
switch (tag) {
- case IO_REPARSE_TAG_INTERNAL:
- if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY))
- return false;
- fallthrough;
- case IO_REPARSE_TAG_DFS:
- case IO_REPARSE_TAG_DFSR:
- case IO_REPARSE_TAG_MOUNT_POINT:
- /* See cifs_create_junction_fattr() */
- fattr->cf_mode = S_IFDIR | 0711;
- break;
case IO_REPARSE_TAG_LX_SYMLINK:
case IO_REPARSE_TAG_LX_FIFO:
case IO_REPARSE_TAG_AF_UNIX:
@@ -1262,7 +1250,14 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
fattr->cf_mode |= S_IFLNK;
break;
default:
- return false;
+ if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY))
+ return false;
+ if (!IS_REPARSE_TAG_NAME_SURROGATE(tag) &&
+ tag != IO_REPARSE_TAG_INTERNAL)
+ return false;
+ /* See cifs_create_junction_fattr() */
+ fattr->cf_mode = S_IFDIR | 0711;
+ break;
}
fattr->cf_dtype = S_DT(fattr->cf_mode);
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h
index c0be5ab45a78..08de853b36a8 100644
--- a/fs/smb/client/reparse.h
+++ b/fs/smb/client/reparse.h
@@ -135,9 +135,6 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
int smb2_mknod_reparse(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
const char *full_path, umode_t mode, dev_t dev);
-int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
- const char *full_path,
- struct kvec *rsp_iov,
- struct cifs_open_info_data *data);
+struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, u32 *len);
#endif /* _CIFS_REPARSE_H */
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index f2ca5963cd9d..b3fa9ee26912 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -680,6 +680,22 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
*pbcc_area = bcc_ptr;
}
+static void
+ascii_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
+{
+ char *bcc_ptr = *pbcc_area;
+
+ strcpy(bcc_ptr, "Linux version ");
+ bcc_ptr += strlen("Linux version ");
+ strcpy(bcc_ptr, init_utsname()->release);
+ bcc_ptr += strlen(init_utsname()->release) + 1;
+
+ strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
+ bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
+
+ *pbcc_area = bcc_ptr;
+}
+
static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
@@ -704,6 +720,25 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
*pbcc_area = bcc_ptr;
}
+static void ascii_domain_string(char **pbcc_area, struct cifs_ses *ses,
+ const struct nls_table *nls_cp)
+{
+ char *bcc_ptr = *pbcc_area;
+ int len;
+
+ /* copy domain */
+ if (ses->domainName != NULL) {
+ len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ if (WARN_ON_ONCE(len < 0))
+ len = CIFS_MAX_DOMAINNAME_LEN - 1;
+ bcc_ptr += len;
+ } /* else we send a null domain name so server will default to its own domain */
+ *bcc_ptr = 0;
+ bcc_ptr++;
+
+ *pbcc_area = bcc_ptr;
+}
+
static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
@@ -749,25 +784,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
*bcc_ptr = 0;
bcc_ptr++; /* account for null termination */
- /* copy domain */
- if (ses->domainName != NULL) {
- len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
- if (WARN_ON_ONCE(len < 0))
- len = CIFS_MAX_DOMAINNAME_LEN - 1;
- bcc_ptr += len;
- } /* else we send a null domain name so server will default to its own domain */
- *bcc_ptr = 0;
- bcc_ptr++;
-
/* BB check for overflow here */
- strcpy(bcc_ptr, "Linux version ");
- bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, init_utsname()->release);
- bcc_ptr += strlen(init_utsname()->release) + 1;
-
- strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
- bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
+ ascii_domain_string(&bcc_ptr, ses, nls_cp);
+ ascii_oslm_strings(&bcc_ptr, nls_cp);
*pbcc_area = bcc_ptr;
}
@@ -1570,7 +1590,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
sess_data->iov[1].iov_len = msg->secblob_len;
pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len);
- if (ses->capabilities & CAP_UNICODE) {
+ if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) {
/* unicode strings must be word aligned */
if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
*bcc_ptr = 0;
@@ -1579,8 +1599,8 @@ sess_auth_kerberos(struct sess_data *sess_data)
unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
} else {
- /* BB: is this right? */
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
+ ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+ ascii_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
}
sess_data->iov[2].iov_len = (long) bcc_ptr -
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 26df807fbe7a..0adeec652dc1 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -568,6 +568,42 @@ static int cifs_query_path_info(const unsigned int xid,
data->reparse_point = le32_to_cpu(fi.Attributes) & ATTR_REPARSE;
}
+#ifdef CONFIG_CIFS_XATTR
+ /*
+ * For WSL CHR and BLK reparse points it is required to fetch
+ * EA $LXDEV which contains major and minor device numbers.
+ */
+ if (!rc && data->reparse_point) {
+ struct smb2_file_full_ea_info *ea;
+
+ ea = (struct smb2_file_full_ea_info *)data->wsl.eas;
+ rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV,
+ &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1],
+ SMB2_WSL_XATTR_DEV_SIZE, cifs_sb);
+ if (rc == SMB2_WSL_XATTR_DEV_SIZE) {
+ ea->next_entry_offset = cpu_to_le32(0);
+ ea->flags = 0;
+ ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN;
+ ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE);
+ memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1);
+ data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 +
+ SMB2_WSL_XATTR_DEV_SIZE;
+ rc = 0;
+ } else if (rc >= 0) {
+ /* It is an error if EA $LXDEV has wrong size. */
+ rc = -EINVAL;
+ } else {
+ /*
+ * In all other cases ignore error if fetching
+ * of EA $LXDEV failed. It is needed only for
+ * WSL CHR and BLK reparse points and wsl_to_fattr()
+ * handle the case when EA is missing.
+ */
+ rc = 0;
+ }
+ }
+#endif
+
return rc;
}
@@ -970,18 +1006,13 @@ static int cifs_query_symlink(const unsigned int xid,
return rc;
}
-static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb,
- const char *full_path,
- struct kvec *rsp_iov,
- struct cifs_open_info_data *data)
+static struct reparse_data_buffer *cifs_get_reparse_point_buffer(const struct kvec *rsp_iov,
+ u32 *plen)
{
- struct reparse_data_buffer *buf;
TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base;
- u32 plen = le16_to_cpu(io->ByteCount);
-
- buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
- le32_to_cpu(io->DataOffset));
- return parse_reparse_point(buf, plen, cifs_sb, full_path, data);
+ *plen = le16_to_cpu(io->ByteCount);
+ return (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
+ le32_to_cpu(io->DataOffset));
}
static bool
@@ -1157,7 +1188,7 @@ struct smb_version_operations smb1_operations = {
.rename = CIFSSMBRename,
.create_hardlink = CIFSCreateHardLink,
.query_symlink = cifs_query_symlink,
- .parse_reparse_point = cifs_parse_reparse_point,
+ .get_reparse_point_buffer = cifs_get_reparse_point_buffer,
.open = cifs_open_file,
.set_fid = cifs_set_fid,
.close = cifs_close_file,
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 41d8cd20b25f..2fe8eeb98535 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4555,9 +4555,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
return rc;
}
} else {
- if (unlikely(!server->secmech.dec))
- return -EIO;
-
+ rc = smb3_crypto_aead_allocate(server);
+ if (unlikely(rc))
+ return rc;
tfm = server->secmech.dec;
}
@@ -5303,7 +5303,7 @@ struct smb_version_operations smb20_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .parse_reparse_point = smb2_parse_reparse_point,
+ .get_reparse_point_buffer = smb2_get_reparse_point_buffer,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.create_reparse_symlink = smb2_create_reparse_symlink,
@@ -5406,7 +5406,7 @@ struct smb_version_operations smb21_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .parse_reparse_point = smb2_parse_reparse_point,
+ .get_reparse_point_buffer = smb2_get_reparse_point_buffer,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.create_reparse_symlink = smb2_create_reparse_symlink,
@@ -5513,7 +5513,7 @@ struct smb_version_operations smb30_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .parse_reparse_point = smb2_parse_reparse_point,
+ .get_reparse_point_buffer = smb2_get_reparse_point_buffer,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.create_reparse_symlink = smb2_create_reparse_symlink,
@@ -5629,7 +5629,7 @@ struct smb_version_operations smb311_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .parse_reparse_point = smb2_parse_reparse_point,
+ .get_reparse_point_buffer = smb2_get_reparse_point_buffer,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.create_reparse_symlink = smb2_create_reparse_symlink,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 81e05db8e4d5..c4d52bebd37d 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1252,15 +1252,8 @@ SMB2_negotiate(const unsigned int xid,
cifs_server_dbg(VFS, "Missing expected negotiate contexts\n");
}
- if (server->cipher_type && !rc) {
- if (!SERVER_IS_CHAN(server)) {
- rc = smb3_crypto_aead_allocate(server);
- } else {
- /* For channels, just reuse the primary server crypto secmech. */
- server->secmech.enc = server->primary_server->secmech.enc;
- server->secmech.dec = server->primary_server->secmech.dec;
- }
- }
+ if (server->cipher_type && !rc)
+ rc = smb3_crypto_aead_allocate(server);
neg_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 764dca80c15c..f79a5165a7cc 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1567,13 +1567,13 @@ struct reparse_nfs_data_buffer {
__u8 DataBuffer[];
} __packed;
-/* For IO_REPARSE_TAG_LX_SYMLINK */
+/* For IO_REPARSE_TAG_LX_SYMLINK - see MS-FSCC 2.1.2.7 */
struct reparse_wsl_symlink_data_buffer {
__le32 ReparseTag;
__le16 ReparseDataLength;
__u16 Reserved;
- __le32 Flags;
- __u8 PathBuffer[]; /* Variable Length UTF-8 string without nul-term */
+ __le32 Version; /* Always 2 */
+ __u8 Target[]; /* Variable Length UTF-8 string without nul-term */
} __packed;
struct validate_negotiate_info_req {
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index c1f22c129111..83764c230e9d 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -39,8 +39,10 @@ void ksmbd_conn_free(struct ksmbd_conn *conn)
xa_destroy(&conn->sessions);
kvfree(conn->request_buf);
kfree(conn->preauth_info);
- if (atomic_dec_and_test(&conn->refcnt))
+ if (atomic_dec_and_test(&conn->refcnt)) {
+ ksmbd_free_transport(conn->transport);
kfree(conn);
+ }
}
/**
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index f103b1bd0400..81a29857b1e3 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -129,14 +129,6 @@ static void free_opinfo(struct oplock_info *opinfo)
kfree(opinfo);
}
-static inline void opinfo_free_rcu(struct rcu_head *rcu_head)
-{
- struct oplock_info *opinfo;
-
- opinfo = container_of(rcu_head, struct oplock_info, rcu_head);
- free_opinfo(opinfo);
-}
-
struct oplock_info *opinfo_get(struct ksmbd_file *fp)
{
struct oplock_info *opinfo;
@@ -157,8 +149,8 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
if (list_empty(&ci->m_op_list))
return NULL;
- rcu_read_lock();
- opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
+ down_read(&ci->m_lock);
+ opinfo = list_first_entry(&ci->m_op_list, struct oplock_info,
op_entry);
if (opinfo) {
if (opinfo->conn == NULL ||
@@ -171,8 +163,7 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
}
}
}
-
- rcu_read_unlock();
+ up_read(&ci->m_lock);
return opinfo;
}
@@ -185,7 +176,7 @@ void opinfo_put(struct oplock_info *opinfo)
if (!atomic_dec_and_test(&opinfo->refcount))
return;
- call_rcu(&opinfo->rcu_head, opinfo_free_rcu);
+ free_opinfo(opinfo);
}
static void opinfo_add(struct oplock_info *opinfo)
@@ -193,7 +184,7 @@ static void opinfo_add(struct oplock_info *opinfo)
struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
down_write(&ci->m_lock);
- list_add_rcu(&opinfo->op_entry, &ci->m_op_list);
+ list_add(&opinfo->op_entry, &ci->m_op_list);
up_write(&ci->m_lock);
}
@@ -207,7 +198,7 @@ static void opinfo_del(struct oplock_info *opinfo)
write_unlock(&lease_list_lock);
}
down_write(&ci->m_lock);
- list_del_rcu(&opinfo->op_entry);
+ list_del(&opinfo->op_entry);
up_write(&ci->m_lock);
}
@@ -1347,8 +1338,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
ci = fp->f_ci;
op = opinfo_get(fp);
- rcu_read_lock();
- list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
+ down_read(&ci->m_lock);
+ list_for_each_entry(brk_op, &ci->m_op_list, op_entry) {
if (brk_op->conn == NULL)
continue;
@@ -1358,7 +1349,6 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
if (ksmbd_conn_releasing(brk_op->conn))
continue;
- rcu_read_unlock();
if (brk_op->is_lease && (brk_op->o_lease->state &
(~(SMB2_LEASE_READ_CACHING_LE |
SMB2_LEASE_HANDLE_CACHING_LE)))) {
@@ -1388,9 +1378,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL);
next:
opinfo_put(brk_op);
- rcu_read_lock();
}
- rcu_read_unlock();
+ up_read(&ci->m_lock);
if (op)
opinfo_put(op);
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 3f64f0787263..9a56eaadd0dd 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -71,7 +71,6 @@ struct oplock_info {
struct list_head lease_entry;
wait_queue_head_t oplock_q; /* Other server threads */
wait_queue_head_t oplock_brk; /* oplock breaking wait */
- struct rcu_head rcu_head;
};
struct lease_break_info {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d24d95d15d87..57839f9708bb 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1602,8 +1602,10 @@ static int krb5_authenticate(struct ksmbd_work *work,
if (prev_sess_id && prev_sess_id != sess->id)
destroy_previous_session(conn, sess->user, prev_sess_id);
- if (sess->state == SMB2_SESSION_VALID)
+ if (sess->state == SMB2_SESSION_VALID) {
ksmbd_free_user(sess->user);
+ sess->user = NULL;
+ }
retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
out_blob, &out_len);
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index a3d8a905b07e..d742ba754348 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -72,6 +72,8 @@
#define FILE_SUPPORTS_ENCRYPTION 0x00020000
#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400
+#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200
#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 3f185ae60dc5..2a3e2b0ce557 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -310,7 +310,11 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
server_conf.signing = req->signing;
server_conf.tcp_port = req->tcp_port;
server_conf.ipc_timeout = req->ipc_timeout * HZ;
- server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL;
+ if (check_mul_overflow(req->deadtime, SMB_ECHO_INTERVAL,
+ &server_conf.deadtime)) {
+ ret = -EINVAL;
+ goto out;
+ }
server_conf.share_fake_fscaps = req->share_fake_fscaps;
ksmbd_init_domain(req->sub_auth);
@@ -337,6 +341,7 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
server_conf.bind_interfaces_only = req->bind_interfaces_only;
ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req),
req->ifc_list_sz);
+out:
if (ret) {
pr_err("Server configuration error: %s %s %s\n",
req->netbios_name, req->server_string,
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 7f38a3c3f5bd..abedf510899a 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -93,17 +93,21 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk)
return t;
}
-static void free_transport(struct tcp_transport *t)
+void ksmbd_free_transport(struct ksmbd_transport *kt)
{
- kernel_sock_shutdown(t->sock, SHUT_RDWR);
- sock_release(t->sock);
- t->sock = NULL;
+ struct tcp_transport *t = TCP_TRANS(kt);
- ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+ sock_release(t->sock);
kfree(t->iov);
kfree(t);
}
+static void free_transport(struct tcp_transport *t)
+{
+ kernel_sock_shutdown(t->sock, SHUT_RDWR);
+ ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+}
+
/**
* kvec_array_init() - initialize a IO vector segment
* @new: IO vector to be initialized
diff --git a/fs/smb/server/transport_tcp.h b/fs/smb/server/transport_tcp.h
index 8c9aa624cfe3..1e51675ee1b2 100644
--- a/fs/smb/server/transport_tcp.h
+++ b/fs/smb/server/transport_tcp.h
@@ -8,6 +8,7 @@
int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz);
struct interface *ksmbd_find_netdev_name_iface_list(char *netdev_name);
+void ksmbd_free_transport(struct ksmbd_transport *kt);
int ksmbd_tcp_init(void);
void ksmbd_tcp_destroy(void);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 8554aa5a1059..391d07da586c 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -479,7 +479,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
int err = 0;
if (work->conn->connection_type) {
- if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
+ if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) ||
+ S_ISDIR(file_inode(fp->filp)->i_mode)) {
pr_err("no right to write(%pD)\n", fp->filp);
err = -EACCES;
goto out;
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 8d1f30dcba7e..1f8fa3468173 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -713,12 +713,8 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
static bool ksmbd_durable_scavenger_alive(void)
{
- mutex_lock(&durable_scavenger_lock);
- if (!durable_scavenger_running) {
- mutex_unlock(&durable_scavenger_lock);
+ if (!durable_scavenger_running)
return false;
- }
- mutex_unlock(&durable_scavenger_lock);
if (kthread_should_stop())
return false;
@@ -799,9 +795,7 @@ static int ksmbd_durable_scavenger(void *dummy)
break;
}
- mutex_lock(&durable_scavenger_lock);
durable_scavenger_running = false;
- mutex_unlock(&durable_scavenger_lock);
module_put(THIS_MODULE);
diff --git a/fs/stat.c b/fs/stat.c
index f13308bfdc98..3d9222807214 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -204,12 +204,25 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
STATX_ATTR_DAX);
idmap = mnt_idmap(path->mnt);
- if (inode->i_op->getattr)
- return inode->i_op->getattr(idmap, path, stat,
- request_mask,
- query_flags);
+ if (inode->i_op->getattr) {
+ int ret;
+
+ ret = inode->i_op->getattr(idmap, path, stat, request_mask,
+ query_flags);
+ if (ret)
+ return ret;
+ } else {
+ generic_fillattr(idmap, request_mask, inode, stat);
+ }
+
+ /*
+ * If this is a block device inode, override the filesystem attributes
+ * with the block device specific parameters that need to be obtained
+ * from the bdev backing inode.
+ */
+ if (S_ISBLK(stat->mode))
+ bdev_statx(path, stat, request_mask);
- generic_fillattr(idmap, request_mask, inode, stat);
return 0;
}
EXPORT_SYMBOL(vfs_getattr_nosec);
@@ -295,15 +308,6 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
if (path_mounted(path))
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
-
- /*
- * If this is a block device inode, override the filesystem
- * attributes with the block device specific parameters that need to be
- * obtained from the bdev backing inode.
- */
- if (S_ISBLK(stat->mode))
- bdev_statx(path, stat, request_mask);
-
return 0;
}
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index fffd6fffdce0..ae0ca6858496 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -3,7 +3,7 @@ config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
select EXPORTFS
- select LIBCRC32C
+ select CRC32
select FS_IOMAP
help
XFS is a high performance journaling filesystem which originated
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8e7f1b324b3b..1a2b3f06fa71 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -105,6 +105,7 @@ xfs_buf_free(
{
unsigned int size = BBTOB(bp->b_length);
+ might_sleep();
trace_xfs_buf_free(bp, _RET_IP_);
ASSERT(list_empty(&bp->b_lru));
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index b4ffd80b7cb6..dcbfa274e06d 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -165,7 +165,7 @@ xmbuf_map_backing_mem(
folio_set_dirty(folio);
folio_unlock(folio);
- bp->b_addr = folio_address(folio);
+ bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos);
return 0;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index edbc521870a1..b4e32f0860b7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1186,9 +1186,8 @@ xfs_qm_dqflush_done(
if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
(lip->li_lsn == qlip->qli_flush_lsn ||
test_bit(XFS_LI_FAILED, &lip->li_flags))) {
-
spin_lock(&ailp->ail_lock);
- xfs_clear_li_failed(lip);
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
if (lip->li_lsn == qlip->qli_flush_lsn) {
/* xfs_ail_update_finish() drops the AIL lock */
tail_lsn = xfs_ail_delete_one(ailp, lip);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index a4bc1642fe56..414b27a86458 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -876,6 +876,7 @@ xfs_getfsmap_rtdev_rmapbt(
const struct xfs_fsmap *keys,
struct xfs_getfsmap_info *info)
{
+ struct xfs_fsmap key0 = *keys; /* struct copy */
struct xfs_mount *mp = tp->t_mountp;
struct xfs_rtgroup *rtg = NULL;
struct xfs_btree_cur *bt_cur = NULL;
@@ -887,32 +888,46 @@ xfs_getfsmap_rtdev_rmapbt(
int error = 0;
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart + mp->m_sb.sb_rblocks);
- if (keys[0].fmr_physical >= eofs)
+ if (key0.fmr_physical >= eofs)
return 0;
+ /*
+ * On zoned filesystems with an internal rt volume, the volume comes
+ * immediately after the end of the data volume. However, the
+ * xfs_rtblock_t address space is relative to the start of the data
+ * device, which means that the first @rtstart fsblocks do not actually
+ * point anywhere. If a fsmap query comes in with the low key starting
+ * below @rtstart, report it as "owned by filesystem".
+ */
rtstart_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart);
- if (keys[0].fmr_physical < rtstart_daddr) {
+ if (xfs_has_zoned(mp) && key0.fmr_physical < rtstart_daddr) {
struct xfs_fsmap_irec frec = {
.owner = XFS_RMAP_OWN_FS,
.len_daddr = rtstart_daddr,
};
- /* Adjust the low key if we are continuing from where we left off. */
- if (keys[0].fmr_length > 0) {
- info->low_daddr = keys[0].fmr_physical + keys[0].fmr_length;
- return 0;
+ /*
+ * Adjust the start of the query range if we're picking up from
+ * a previous round, and only emit the record if we haven't
+ * already gone past.
+ */
+ key0.fmr_physical += key0.fmr_length;
+ if (key0.fmr_physical < rtstart_daddr) {
+ error = xfs_getfsmap_helper(tp, info, &frec);
+ if (error)
+ return error;
+
+ key0.fmr_physical = rtstart_daddr;
}
- /* Fabricate an rmap entry for space occupied by the data dev */
- error = xfs_getfsmap_helper(tp, info, &frec);
- if (error)
- return error;
+ /* Zero the other fields to avoid further adjustments. */
+ key0.fmr_owner = 0;
+ key0.fmr_offset = 0;
+ key0.fmr_length = 0;
}
- start_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + keys[0].fmr_physical);
- end_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr +
- min(eofs - 1, keys[1].fmr_physical));
-
+ start_rtb = xfs_daddr_to_rtb(mp, key0.fmr_physical);
+ end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
info->missing_owner = XFS_FMR_OWN_FREE;
/*
@@ -920,12 +935,12 @@ xfs_getfsmap_rtdev_rmapbt(
* low to the fsmap low key and max out the high key to the end
* of the rtgroup.
*/
- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
- error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, key0.fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->low, &key0);
if (error)
return error;
- info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+ info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, key0.fmr_length);
+ xfs_getfsmap_set_irec_flags(&info->low, &key0);
/* Adjust the low key if we are continuing from where we left off. */
if (info->low.rm_blockcount == 0) {
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 40fc1bf900af..c6cb0b6b9e46 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -1089,13 +1089,7 @@ xfs_iflush_abort(
* state. Whilst the inode is in the AIL, it should have a valid buffer
* pointer for push operations to access - it is only safe to remove the
* inode from the buffer once it has been removed from the AIL.
- *
- * We also clear the failed bit before removing the item from the AIL
- * as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer
- * references the inode item owns and needs to hold until we've fully
- * aborted the inode log item and detached it from the buffer.
*/
- clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
xfs_trans_ail_delete(&iip->ili_item, 0);
/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6493bdb57351..980aabc49512 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2888,7 +2888,7 @@ xlog_force_and_check_iclog(
*
* 1. the current iclog is active and has no data; the previous iclog
* is in the active or dirty state.
- * 2. the current iclog is drity, and the previous iclog is in the
+ * 2. the current iclog is dirty, and the previous iclog is in the
* active or dirty state.
*
* We may sleep if:
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 799b84220ebb..e5192c12e7ac 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -229,6 +229,7 @@ typedef struct xfs_mount {
bool m_finobt_nores; /* no per-AG finobt resv. */
bool m_update_sb; /* sb needs update in mount */
unsigned int m_max_open_zones;
+ unsigned int m_zonegc_low_space;
/*
* Bitsets of per-fs metadata that have been checked and/or are sick.
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index b7e82d85f043..7a5c5ef2db92 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -718,8 +718,40 @@ max_open_zones_show(
}
XFS_SYSFS_ATTR_RO(max_open_zones);
+static ssize_t
+zonegc_low_space_store(
+ struct kobject *kobj,
+ const char *buf,
+ size_t count)
+{
+ int ret;
+ unsigned int val;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ zoned_to_mp(kobj)->m_zonegc_low_space = val;
+
+ return count;
+}
+
+static ssize_t
+zonegc_low_space_show(
+ struct kobject *kobj,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n",
+ zoned_to_mp(kobj)->m_zonegc_low_space);
+}
+XFS_SYSFS_ATTR_RW(zonegc_low_space);
+
static struct attribute *xfs_zoned_attrs[] = {
ATTR_LIST(max_open_zones),
+ ATTR_LIST(zonegc_low_space),
NULL,
};
ATTRIBUTE_GROUPS(xfs_zoned);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 0fcb1828e598..85a649fec6ac 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -909,10 +909,9 @@ xfs_trans_ail_delete(
return;
}
- /* xfs_ail_update_finish() drops the AIL lock */
- xfs_clear_li_failed(lip);
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
tail_lsn = xfs_ail_delete_one(ailp, lip);
- xfs_ail_update_finish(ailp, tail_lsn);
+ xfs_ail_update_finish(ailp, tail_lsn); /* drops the AIL lock */
}
int
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index bd841df93021..f945f0450b16 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -167,32 +167,4 @@ xfs_trans_ail_copy_lsn(
}
#endif
-static inline void
-xfs_clear_li_failed(
- struct xfs_log_item *lip)
-{
- struct xfs_buf *bp = lip->li_buf;
-
- ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
- lockdep_assert_held(&lip->li_ailp->ail_lock);
-
- if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) {
- lip->li_buf = NULL;
- xfs_buf_rele(bp);
- }
-}
-
-static inline void
-xfs_set_li_failed(
- struct xfs_log_item *lip,
- struct xfs_buf *bp)
-{
- lockdep_assert_held(&lip->li_ailp->ail_lock);
-
- if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) {
- xfs_buf_hold(bp);
- lip->li_buf = bp;
- }
-}
-
#endif /* __XFS_TRANS_PRIV_H__ */
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 52af234936a2..d509e49b2aaa 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -1201,6 +1201,13 @@ xfs_mount_zones(
xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
iz.available + iz.reclaimable);
+ /*
+ * The user may configure GC to free up a percentage of unused blocks.
+ * By default this is 0. GC will always trigger at the minimum level
+ * for keeping max_open_zones available for data placement.
+ */
+ mp->m_zonegc_low_space = 0;
+
error = xfs_zone_gc_mount(mp);
if (error)
goto out_free_zone_info;
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index c5136ea9bb1d..8c541ca71872 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -162,18 +162,30 @@ struct xfs_zone_gc_data {
/*
* We aim to keep enough zones free in stock to fully use the open zone limit
- * for data placement purposes.
+ * for data placement purposes. Additionally, the m_zonegc_low_space tunable
+ * can be set to make sure a fraction of the unused blocks are available for
+ * writing.
*/
bool
xfs_zoned_need_gc(
struct xfs_mount *mp)
{
+ s64 available, free;
+
if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE))
return false;
- if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) <
+
+ available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);
+
+ if (available <
mp->m_groups[XG_TYPE_RTG].blocks *
(mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
return true;
+
+ free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
+ if (available < mult_frac(free, mp->m_zonegc_low_space, 100))
+ return true;
+
return false;
}