summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c5
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/fsclient.c4
-rw-r--r--fs/afs/vlclient.c1
-rw-r--r--fs/cifs/cifs_ioctl.h25
-rw-r--r--fs/cifs/cifspdu.h3
-rw-r--r--fs/cifs/ioctl.c143
-rw-r--r--fs/cifs/smb2pdu.c7
-rw-r--r--fs/cifs/trace.h29
-rw-r--r--fs/debugfs/inode.c9
-rw-r--r--fs/gfs2/file.c9
-rw-r--r--fs/gfs2/glock.c28
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/log.c6
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/gfs2/lops.c7
-rw-r--r--fs/gfs2/lops.h1
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/io-wq.c29
-rw-r--r--fs/io-wq.h2
-rw-r--r--fs/io_uring.c15
-rw-r--r--fs/netfs/Kconfig2
-rw-r--r--fs/netfs/read_helper.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pagelist.c20
-rw-r--r--fs/nfs/pnfs.c17
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c30
-rw-r--r--fs/notify/fdinfo.c2
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c18
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c12
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c46
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c17
-rw-r--r--fs/xfs/xfs_inode.c29
-rw-r--r--fs/xfs/xfs_ioctl.c101
-rw-r--r--fs/xfs/xfs_message.h2
40 files changed, 432 insertions, 213 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a4e9e6e07e93..d3c6bb22c5f4 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -322,6 +322,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
return ret;
call->unmarshall++;
+ fallthrough;
+
case 5:
break;
}
@@ -418,6 +420,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
r->node[loop] = ntohl(b[loop + 5]);
call->unmarshall++;
+ fallthrough;
case 2:
break;
@@ -530,6 +533,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
r->node[loop] = ntohl(b[loop + 5]);
call->unmarshall++;
+ fallthrough;
case 2:
break;
@@ -663,6 +667,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
+ fallthrough;
case 3:
break;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9fbe5a5ec9bd..78719f2f567e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1919,7 +1919,9 @@ static void afs_rename_edit_dir(struct afs_operation *op)
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
- if (new_inode->i_nlink > 0)
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else if (new_inode->i_nlink > 0)
drop_nlink(new_inode);
spin_unlock(&new_inode->i_lock);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f695a260442..dd3f45d906d2 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -388,6 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
req->file_size = vp->scb.status.size;
call->unmarshall++;
+ fallthrough;
case 5:
break;
@@ -1408,6 +1409,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
_debug("motd '%s'", p);
call->unmarshall++;
+ fallthrough;
case 8:
break;
@@ -1845,6 +1847,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
xdr_decode_AFSVolSync(&bp, &op->volsync);
call->unmarshall++;
+ fallthrough;
case 6:
break;
@@ -1979,6 +1982,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
xdr_decode_AFSVolSync(&bp, &op->volsync);
call->unmarshall++;
+ fallthrough;
case 4:
break;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dc9327332f06..00fca3c66ba6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -593,6 +593,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (ret < 0)
return ret;
call->unmarshall = 6;
+ fallthrough;
case 6:
break;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 4a97fe12006b..37fc7d6ac457 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -72,15 +72,28 @@ struct smb3_key_debug_info {
} __packed;
/*
- * Dump full key (32 byte encrypt/decrypt keys instead of 16 bytes)
- * is needed if GCM256 (stronger encryption) negotiated
+ * Dump variable-sized keys
*/
struct smb3_full_key_debug_info {
- __u64 Suid;
+ /* INPUT: size of userspace buffer */
+ __u32 in_size;
+
+ /*
+ * INPUT: 0 for current user, otherwise session to dump
+ * OUTPUT: session id that was dumped
+ */
+ __u64 session_id;
__u16 cipher_type;
- __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
- __u8 smb3encryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
- __u8 smb3decryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
+ __u8 session_key_length;
+ __u8 server_in_key_length;
+ __u8 server_out_key_length;
+ __u8 data[];
+ /*
+ * return this struct with the keys appended at the end:
+ * __u8 session_key[session_key_length];
+ * __u8 server_in_key[server_in_key_length];
+ * __u8 server_out_key[server_out_key_length];
+ */
} __packed;
struct smb3_notify {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b53a87db282f..554d64fe171e 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -148,7 +148,8 @@
#define SMB3_SIGN_KEY_SIZE (16)
/*
- * Size of the smb3 encryption/decryption keys
+ * Size of the smb3 encryption/decryption key storage.
+ * This size is big enough to store any cipher key types.
*/
#define SMB3_ENC_DEC_KEY_SIZE (32)
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 28ec8d7c521a..d67d281ab863 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -33,6 +33,7 @@
#include "cifsfs.h"
#include "cifs_ioctl.h"
#include "smb2proto.h"
+#include "smb2glob.h"
#include <linux/btrfs.h>
static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
@@ -214,48 +215,112 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
return 0;
}
-static int cifs_dump_full_key(struct cifs_tcon *tcon, unsigned long arg)
+static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in)
{
- struct smb3_full_key_debug_info pfull_key_inf;
- __u64 suid;
- struct list_head *tmp;
+ struct smb3_full_key_debug_info out;
struct cifs_ses *ses;
+ int rc = 0;
bool found = false;
+ u8 __user *end;
- if (!smb3_encryption_required(tcon))
- return -EOPNOTSUPP;
+ if (!smb3_encryption_required(tcon)) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* copy user input into our output buffer */
+ if (copy_from_user(&out, in, sizeof(out))) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (!out.session_id) {
+ /* if ses id is 0, use current user session */
+ ses = tcon->ses;
+ } else {
+ /* otherwise if a session id is given, look for it in all our sessions */
+ struct cifs_ses *ses_it = NULL;
+ struct TCP_Server_Info *server_it = NULL;
- ses = tcon->ses; /* default to user id for current user */
- if (get_user(suid, (__u64 __user *)arg))
- suid = 0;
- if (suid) {
- /* search to see if there is a session with a matching SMB UID */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &tcon->ses->server->smb_ses_list) {
- ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
- if (ses->Suid == suid) {
- found = true;
- break;
+ list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
+ list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
+ if (ses_it->Suid == out.session_id) {
+ ses = ses_it;
+ /*
+ * since we are using the session outside the crit
+ * section, we need to make sure it won't be released
+ * so increment its refcount
+ */
+ ses->ses_count++;
+ found = true;
+ goto search_end;
+ }
}
}
+search_end:
spin_unlock(&cifs_tcp_ses_lock);
- if (found == false)
- return -EINVAL;
- } /* else uses default user's SMB UID (ie current user) */
-
- pfull_key_inf.cipher_type = le16_to_cpu(ses->server->cipher_type);
- pfull_key_inf.Suid = ses->Suid;
- memcpy(pfull_key_inf.auth_key, ses->auth_key.response,
- 16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
- memcpy(pfull_key_inf.smb3decryptionkey, ses->smb3decryptionkey,
- 32 /* SMB3_ENC_DEC_KEY_SIZE */);
- memcpy(pfull_key_inf.smb3encryptionkey,
- ses->smb3encryptionkey, 32 /* SMB3_ENC_DEC_KEY_SIZE */);
- if (copy_to_user((void __user *)arg, &pfull_key_inf,
- sizeof(struct smb3_full_key_debug_info)))
- return -EFAULT;
+ if (!found) {
+ rc = -ENOENT;
+ goto out;
+ }
+ }
- return 0;
+ switch (ses->server->cipher_type) {
+ case SMB2_ENCRYPTION_AES128_CCM:
+ case SMB2_ENCRYPTION_AES128_GCM:
+ out.session_key_length = CIFS_SESS_KEY_SIZE;
+ out.server_in_key_length = out.server_out_key_length = SMB3_GCM128_CRYPTKEY_SIZE;
+ break;
+ case SMB2_ENCRYPTION_AES256_CCM:
+ case SMB2_ENCRYPTION_AES256_GCM:
+ out.session_key_length = CIFS_SESS_KEY_SIZE;
+ out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* check if user buffer is big enough to store all the keys */
+ if (out.in_size < sizeof(out) + out.session_key_length + out.server_in_key_length
+ + out.server_out_key_length) {
+ rc = -ENOBUFS;
+ goto out;
+ }
+
+ out.session_id = ses->Suid;
+ out.cipher_type = le16_to_cpu(ses->server->cipher_type);
+
+ /* overwrite user input with our output */
+ if (copy_to_user(in, &out, sizeof(out))) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* append all the keys at the end of the user buffer */
+ end = in->data;
+ if (copy_to_user(end, ses->auth_key.response, out.session_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ end += out.session_key_length;
+
+ if (copy_to_user(end, ses->smb3encryptionkey, out.server_in_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ end += out.server_in_key_length;
+
+ if (copy_to_user(end, ses->smb3decryptionkey, out.server_out_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+out:
+ if (found)
+ cifs_put_smb_ses(ses);
+ return rc;
}
long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
@@ -371,6 +436,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EOPNOTSUPP;
break;
case CIFS_DUMP_KEY:
+ /*
+ * Dump encryption keys. This is an old ioctl that only
+ * handles AES-128-{CCM,GCM}.
+ */
if (pSMBFile == NULL)
break;
if (!capable(CAP_SYS_ADMIN)) {
@@ -398,11 +467,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
else
rc = 0;
break;
- /*
- * Dump full key (32 bytes instead of 16 bytes) is
- * needed if GCM256 (stronger encryption) negotiated
- */
case CIFS_DUMP_FULL_KEY:
+ /*
+ * Dump encryption keys (handles any key sizes)
+ */
if (pSMBFile == NULL)
break;
if (!capable(CAP_SYS_ADMIN)) {
@@ -410,8 +478,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
}
tcon = tlink_tcon(pSMBFile->tlink);
- rc = cifs_dump_full_key(tcon, arg);
-
+ rc = cifs_dump_full_key(tcon, (void __user *)arg);
break;
case CIFS_IOC_NOTIFY:
if (!S_ISDIR(inode->i_mode)) {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 9f24eb88297a..c205f93e0a10 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -958,6 +958,13 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
/* Internal types */
server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
+ /*
+ * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context
+ * Set the cipher type manually.
+ */
+ if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ server->cipher_type = SMB2_ENCRYPTION_AES128_CCM;
+
security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
(struct smb2_sync_hdr *)rsp);
/*
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index d6df908dccad..dafcb6ab050d 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -12,6 +12,11 @@
#include <linux/tracepoint.h>
+/*
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
+ */
+
/* For logging errors in read or write */
DECLARE_EVENT_CLASS(smb3_rw_err_class,
TP_PROTO(unsigned int xid,
@@ -529,16 +534,16 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
TP_ARGS(xid, func_name, rc),
TP_STRUCT__entry(
__field(unsigned int, xid)
- __field(const char *, func_name)
+ __string(func_name, func_name)
__field(int, rc)
),
TP_fast_assign(
__entry->xid = xid;
- __entry->func_name = func_name;
+ __assign_str(func_name, func_name);
__entry->rc = rc;
),
TP_printk("\t%s: xid=%u rc=%d",
- __entry->func_name, __entry->xid, __entry->rc)
+ __get_str(func_name), __entry->xid, __entry->rc)
)
#define DEFINE_SMB3_EXIT_ERR_EVENT(name) \
@@ -583,14 +588,14 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
TP_ARGS(xid, func_name),
TP_STRUCT__entry(
__field(unsigned int, xid)
- __field(const char *, func_name)
+ __string(func_name, func_name)
),
TP_fast_assign(
__entry->xid = xid;
- __entry->func_name = func_name;
+ __assign_str(func_name, func_name);
),
TP_printk("\t%s: xid=%u",
- __entry->func_name, __entry->xid)
+ __get_str(func_name), __entry->xid)
)
#define DEFINE_SMB3_ENTER_EXIT_EVENT(name) \
@@ -857,16 +862,16 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_STRUCT__entry(
__field(__u64, currmid)
__field(__u64, conn_id)
- __field(char *, hostname)
+ __string(hostname, hostname)
),
TP_fast_assign(
__entry->currmid = currmid;
__entry->conn_id = conn_id;
- __entry->hostname = hostname;
+ __assign_str(hostname, hostname);
),
TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
__entry->conn_id,
- __entry->hostname,
+ __get_str(hostname),
__entry->currmid)
)
@@ -891,7 +896,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_STRUCT__entry(
__field(__u64, currmid)
__field(__u64, conn_id)
- __field(char *, hostname)
+ __string(hostname, hostname)
__field(int, credits)
__field(int, credits_to_add)
__field(int, in_flight)
@@ -899,7 +904,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_fast_assign(
__entry->currmid = currmid;
__entry->conn_id = conn_id;
- __entry->hostname = hostname;
+ __assign_str(hostname, hostname);
__entry->credits = credits;
__entry->credits_to_add = credits_to_add;
__entry->in_flight = in_flight;
@@ -907,7 +912,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
"credits=%d credit_change=%d in_flight=%d",
__entry->conn_id,
- __entry->hostname,
+ __get_str(hostname),
__entry->currmid,
__entry->credits,
__entry->credits_to_add,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1d252164d97b..8129a430d789 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -45,10 +45,13 @@ static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;
static int debugfs_setattr(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *ia)
{
- int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+ int ret;
- if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
- return ret;
+ if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
+ ret = security_locked_down(LOCKDOWN_DEBUGFS);
+ if (ret)
+ return ret;
+ }
return simple_setattr(&init_user_ns, dentry, ia);
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a0b542d84cd9..8a35a0196b6d 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -540,9 +540,11 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
vm_fault_t ret;
+ u16 state;
int err;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ state = (vmf->flags & FAULT_FLAG_WRITE) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+ gfs2_holder_init(ip->i_gl, state, 0, &gh);
err = gfs2_glock_nq(&gh);
if (err) {
ret = block_page_mkwrite_return(err);
@@ -911,8 +913,11 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
current->backing_dev_info = inode_to_bdi(inode);
buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
current->backing_dev_info = NULL;
- if (unlikely(buffered <= 0))
+ if (unlikely(buffered <= 0)) {
+ if (!ret)
+ ret = buffered;
goto out_unlock;
+ }
/*
* We need to ensure that the page cache pages are written to
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ea7fc5c641c7..d9cb261f55b0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -582,6 +582,16 @@ out_locked:
spin_unlock(&gl->gl_lockref.lock);
}
+static bool is_system_glock(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+ if (gl == m_ip->i_gl)
+ return true;
+ return false;
+}
+
/**
* do_xmote - Calls the DLM to change the state of a lock
* @gl: The lock state
@@ -671,17 +681,25 @@ skip_inval:
* to see sd_log_error and withdraw, and in the meantime, requeue the
* work for later.
*
+ * We make a special exception for some system glocks, such as the
+ * system statfs inode glock, which needs to be granted before the
+ * gfs2_quotad daemon can exit, and that exit needs to finish before
+ * we can unmount the withdrawn file system.
+ *
* However, if we're just unlocking the lock (say, for unmount, when
* gfs2_gl_hash_clear calls clear_glock) and recovery is complete
* then it's okay to tell dlm to unlock it.
*/
if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
gfs2_withdraw_delayed(sdp);
- if (glock_blocked_by_withdraw(gl)) {
- if (target != LM_ST_UNLOCKED ||
- test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) {
+ if (glock_blocked_by_withdraw(gl) &&
+ (target != LM_ST_UNLOCKED ||
+ test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
+ if (!is_system_glock(gl)) {
gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
goto out;
+ } else {
+ clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
}
}
@@ -1466,9 +1484,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
glock_blocked_by_withdraw(gl) &&
gh->gh_gl != sdp->sd_jinode_gl) {
sdp->sd_glock_dqs_held++;
+ spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
+ spin_lock(&gl->gl_lockref.lock);
}
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1775,6 +1795,7 @@ __acquires(&lru_lock)
while(!list_empty(list)) {
gl = list_first_entry(list, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
+ clear_bit(GLF_LRU, &gl->gl_flags);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
@@ -1820,7 +1841,6 @@ static long gfs2_scan_glock_lru(int nr)
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
- clear_bit(GLF_LRU, &gl->gl_flags);
freed++;
continue;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 454095e9fedf..54d3fbeb3002 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -396,7 +396,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
struct timespec64 atime;
u16 height, depth;
umode_t mode = be32_to_cpu(str->di_mode);
- bool is_new = ip->i_inode.i_flags & I_NEW;
+ bool is_new = ip->i_inode.i_state & I_NEW;
if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
goto corrupt;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 97d54e581a7b..42c15cfc0821 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -926,10 +926,10 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
}
/**
- * ail_drain - drain the ail lists after a withdraw
+ * gfs2_ail_drain - drain the ail lists after a withdraw
* @sdp: Pointer to GFS2 superblock
*/
-static void ail_drain(struct gfs2_sbd *sdp)
+void gfs2_ail_drain(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr;
@@ -956,6 +956,7 @@ static void ail_drain(struct gfs2_sbd *sdp)
list_del(&tr->tr_list);
gfs2_trans_free(sdp, tr);
}
+ gfs2_drain_revokes(sdp);
spin_unlock(&sdp->sd_ail_lock);
}
@@ -1162,7 +1163,6 @@ out_withdraw:
if (tr && list_empty(&tr->tr_list))
list_add(&tr->tr_list, &sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
- ail_drain(sdp); /* frees all transactions */
tr = NULL;
goto out_end;
}
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index eea58015710e..fc905c2af53c 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -93,5 +93,6 @@ extern int gfs2_logd(void *data);
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
+extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 221e7118cc3b..8ee05d25dfa6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -885,7 +885,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_log_write_page(sdp, page);
}
-static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+void gfs2_drain_revokes(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_log_revokes;
struct gfs2_bufdata *bd;
@@ -900,6 +900,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
}
}
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+ gfs2_drain_revokes(sdp);
+}
+
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 31b6dd0d2e5d..f707601597dc 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -20,6 +20,7 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, bool keep_cache);
+extern void gfs2_drain_revokes(struct gfs2_sbd *sdp);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
return sdp->sd_ldptrs;
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 3e08027a6c81..f4325b44956d 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -131,6 +131,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
return;
+ gfs2_ail_drain(sdp); /* frees all transactions */
inode = sdp->sd_jdesc->jd_inode;
ip = GFS2_I(inode);
i_gl = ip->i_gl;
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5361a9b4b47b..b3e8624a37d0 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
return cwd->wqe->wq == data;
}
+void io_wq_exit_start(struct io_wq *wq)
+{
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
static void io_wq_exit_workers(struct io_wq *wq)
{
struct callback_head *cb;
int node;
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
if (!wq->task)
return;
@@ -1003,13 +1006,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
struct io_wqe *wqe = wq->wqes[node];
io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
- spin_lock_irq(&wq->hash->wait.lock);
- list_del_init(&wq->wqes[node]->wait.entry);
- spin_unlock_irq(&wq->hash->wait.lock);
}
rcu_read_unlock();
io_worker_ref_put(wq);
wait_for_completion(&wq->worker_done);
+
+ for_each_node(node) {
+ spin_lock_irq(&wq->hash->wait.lock);
+ list_del_init(&wq->wqes[node]->wait.entry);
+ spin_unlock_irq(&wq->hash->wait.lock);
+ }
put_task_struct(wq->task);
wq->task = NULL;
}
@@ -1020,8 +1026,6 @@ static void io_wq_destroy(struct io_wq *wq)
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- io_wq_exit_workers(wq);
-
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
struct io_cb_cancel_data match = {
@@ -1036,16 +1040,13 @@ static void io_wq_destroy(struct io_wq *wq)
kfree(wq);
}
-void io_wq_put(struct io_wq *wq)
-{
- if (refcount_dec_and_test(&wq->refs))
- io_wq_destroy(wq);
-}
-
void io_wq_put_and_exit(struct io_wq *wq)
{
+ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
io_wq_exit_workers(wq);
- io_wq_put(wq);
+ if (refcount_dec_and_test(&wq->refs))
+ io_wq_destroy(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 0e6d310999e8..af2df0680ee2 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -122,7 +122,7 @@ struct io_wq_data {
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-void io_wq_put(struct io_wq *wq);
+void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f82954004f6..903458afd56c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9039,11 +9039,16 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
struct io_tctx_node *node;
unsigned long index;
- tctx->io_wq = NULL;
xa_for_each(&tctx->xa, index, node)
io_uring_del_task_file(index);
- if (wq)
+ if (wq) {
+ /*
+ * Must be after io_uring_del_task_file() (removes nodes under
+ * uring_lock) to avoid race with io_uring_try_cancel_iowq().
+ */
+ tctx->io_wq = NULL;
io_wq_put_and_exit(wq);
+ }
}
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
@@ -9078,6 +9083,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
if (!current->io_uring)
return;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
WARN_ON_ONCE(!sqd || sqd->thread != current);
atomic_inc(&tctx->in_idle);
@@ -9112,6 +9120,9 @@ void __io_uring_cancel(struct files_struct *files)
DEFINE_WAIT(wait);
s64 inflight;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
do {
diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index 578112713703..b4db21022cb4 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config NETFS_SUPPORT
- tristate "Support for network filesystem high-level I/O"
+ tristate
help
This option enables support for network filesystems, including
helpers for high-level buffered I/O, abstracting out read
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 193841d03de0..725614625ed4 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -1068,7 +1068,7 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
retry:
- page = grab_cache_page_write_begin(mapping, index, 0);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d158a500c25c..d2103852475f 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -718,7 +718,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (unlikely(!p))
goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++);
- if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+ if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 93e60e921f92..bc0c698f3350 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -362,7 +362,7 @@ static const struct kernel_param_ops param_ops_nfs_timeout = {
.set = param_set_nfs_timeout,
.get = param_get_nfs_timeout,
};
-#define param_check_nfs_timeout(name, p) __param_check(name, p, int);
+#define param_check_nfs_timeout(name, p) __param_check(name, p, int)
module_param(nfs_mountpoint_expiry_timeout, nfs_timeout, 0644);
MODULE_PARM_DESC(nfs_mountpoint_expiry_timeout,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 57b3821d975a..a1e5c6b85ded 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -211,7 +211,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
case SEEK_HOLE:
case SEEK_DATA:
ret = nfs42_proc_llseek(filep, offset, whence);
- if (ret != -ENOTSUPP)
+ if (ret != -EOPNOTSUPP)
return ret;
fallthrough;
default:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87d04f2c9385..0cd965882232 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1706,7 +1706,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
- if (!signal_pending(current)) {
+ if (!fatal_signal_pending(current)) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
else
@@ -3487,7 +3487,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
write_sequnlock(&state->seqlock);
trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
status = -EINTR;
else
if (schedule_timeout(5*HZ) != 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6c20b28d9d7c..cf9cc62ec48e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1094,15 +1094,16 @@ nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *prev = NULL;
unsigned int size;
- if (mirror->pg_count != 0) {
- prev = nfs_list_entry(mirror->pg_list.prev);
- } else {
+ if (list_empty(&mirror->pg_list)) {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
if (desc->pg_error < 0)
return 0;
mirror->pg_base = req->wb_pgbase;
- }
+ mirror->pg_count = 0;
+ mirror->pg_recoalesce = 0;
+ } else
+ prev = nfs_list_entry(mirror->pg_list.prev);
if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
@@ -1127,18 +1128,13 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
if (!list_empty(&mirror->pg_list)) {
int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
- else
+ if (list_empty(&mirror->pg_list))
mirror->pg_bytes_written += mirror->pg_count;
}
- if (list_empty(&mirror->pg_list)) {
- mirror->pg_count = 0;
- mirror->pg_base = 0;
- }
}
static void
@@ -1227,10 +1223,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do {
list_splice_init(&mirror->pg_list, &head);
- mirror->pg_bytes_written -= mirror->pg_count;
- mirror->pg_count = 0;
- mirror->pg_base = 0;
- mirror->pg_recoalesce = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 03e0b34c4a64..2c01ee805306 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1317,6 +1317,11 @@ _pnfs_return_layout(struct inode *ino)
{
struct pnfs_layout_hdr *lo = NULL;
struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
LIST_HEAD(tmp_list);
const struct cred *cred;
nfs4_stateid stateid;
@@ -1344,16 +1349,10 @@ _pnfs_return_layout(struct inode *ino)
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
- if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
- struct pnfs_layout_range range = {
- .iomode = IOMODE_ANY,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
+ if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
- }
/* Don't send a LAYOUTRETURN if list was initially empty */
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
@@ -2678,7 +2677,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- u64 rd_size = req->wb_bytes;
+ u64 rd_size;
pnfs_generic_pg_check_layout(pgio);
pnfs_generic_pg_check_range(pgio, req);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 19a212f9725d..fe58525cfed4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1379,7 +1379,7 @@ static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
-#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
+#define param_check_portnr(name, p) __param_check(name, p, unsigned int)
module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
module_param_named(callback_nr_threads, nfs_callback_nr_threads, ushort, 0644);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 71fefb30e015..be5b6d2c01e7 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -424,11 +424,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
* events generated by the listener process itself, without disclosing
* the pids of other processes.
*/
- if (!capable(CAP_SYS_ADMIN) &&
+ if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
task_tgid(current) != event->pid)
metadata.pid = 0;
- if (path && path->mnt && path->dentry) {
+ /*
+ * For now, fid mode is required for an unprivileged listener and
+ * fid mode does not report fd in events. Keep this check anyway
+ * for safety in case fid mode requirement is relaxed in the future
+ * to allow unprivileged listener to get events with no fd and no fid.
+ */
+ if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
+ path && path->mnt && path->dentry) {
fd = create_fd(group, path, &f);
if (fd < 0)
return fd;
@@ -1040,6 +1047,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
int f_flags, fd;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
unsigned int class = flags & FANOTIFY_CLASS_BITS;
+ unsigned int internal_flags = 0;
pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags);
@@ -1053,6 +1061,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
*/
if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
return -EPERM;
+
+ /*
+ * Setting the internal flag FANOTIFY_UNPRIV on the group
+ * prevents setting mount/filesystem marks on this group and
+ * prevents reporting pid and open fd in events.
+ */
+ internal_flags |= FANOTIFY_UNPRIV;
}
#ifdef CONFIG_AUDITSYSCALL
@@ -1105,7 +1120,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group;
}
- group->fanotify_data.flags = flags;
+ group->fanotify_data.flags = flags | internal_flags;
group->memcg = get_mem_cgroup_from_mm(current->mm);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
@@ -1305,11 +1320,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
group = f.file->private_data;
/*
- * An unprivileged user is not allowed to watch a mount point nor
- * a filesystem.
+ * An unprivileged user is not allowed to setup mount nor filesystem
+ * marks. This also includes setting up such marks by a group that
+ * was initialized by an unprivileged user.
*/
ret = -EPERM;
- if (!capable(CAP_SYS_ADMIN) &&
+ if ((!capable(CAP_SYS_ADMIN) ||
+ FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
mark_type != FAN_MARK_INODE)
goto fput_and_out;
@@ -1460,6 +1477,7 @@ static int __init fanotify_user_setup(void)
max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS,
FANOTIFY_DEFAULT_MAX_USER_MARKS);
+ BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index a712b2aaa9ac..57f0d5d9f934 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -144,7 +144,7 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
struct fsnotify_group *group = f->private_data;
seq_printf(m, "fanotify flags:%x event-flags:%x\n",
- group->fanotify_data.flags,
+ group->fanotify_data.flags & FANOTIFY_INIT_FLAGS,
group->fanotify_data.f_flags);
show_fdinfo(m, f, fanotify_fdinfo);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3851bfcdba56..58bbf334265b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2703,6 +2703,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
void *page;
int rv;
+ /* A task may only write when it was the opener. */
+ if (file->f_cred != current_real_cred())
+ return -EPERM;
+
rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (!task) {
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e32a1833d523..bbfea8022a3b 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -325,10 +325,22 @@ out:
error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
if (error2)
return error2;
- ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
- xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
- pag->pagf_freeblks + pag->pagf_flcount);
+
+ /*
+ * If there isn't enough space in the AG to satisfy the
+ * reservation, let the caller know that there wasn't enough
+ * space. Callers are responsible for deciding what to do
+ * next, since (in theory) we can stumble along with
+ * insufficient reservation if data blocks are being freed to
+ * replenish the AG's free space.
+ */
+ if (!error &&
+ xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+ xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
+ pag->pagf_freeblks + pag->pagf_flcount)
+ error = -ENOSPC;
}
+
return error;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7e3b9b01431e..a3e0e6f672d6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents(
ASSERT(cur);
ASSERT(whichfork != XFS_COW_FORK);
- ASSERT(!xfs_need_iread_extents(ifp));
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
@@ -5350,7 +5349,6 @@ __xfs_bunmapi(
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t max_len;
- xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
bool done = false;
@@ -5442,16 +5440,6 @@ __xfs_bunmapi(
del = got;
wasdel = isnullstartblock(del.br_startblock);
- /*
- * Make sure we don't touch multiple AGF headers out of order
- * in a single transaction, as that could cause AB-BA deadlocks.
- */
- if (!wasdel && !isrt) {
- agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
- if (prev_agno != NULLAGNUMBER && prev_agno > agno)
- break;
- prev_agno = agno;
- }
if (got.br_startoff < start) {
del.br_startoff = start;
del.br_blockcount -= start - got.br_startoff;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 5c9a7440d9e4..f3254a4f4cb4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -559,8 +559,17 @@ xfs_dinode_calc_crc(
/*
* Validate di_extsize hint.
*
- * The rules are documented at xfs_ioctl_setattr_check_extsize().
- * These functions must be kept in sync with each other.
+ * 1. Extent size hint is only valid for directories and regular files.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 4. Hint cannot be larger than MAXTEXTLEN.
+ * 5. Can be changed on directories at any time.
+ * 6. Hint value of 0 turns off hints, clears inode flags.
+ * 7. Extent size must be a multiple of the appropriate block size.
+ * For realtime files, this is the rt extent size.
+ * 8. For non-realtime files, the extent size hint must be limited
+ * to half the AG size to avoid alignment extending the extent beyond the
+ * limits of the AG.
*/
xfs_failaddr_t
xfs_inode_validate_extsize(
@@ -580,6 +589,28 @@ xfs_inode_validate_extsize(
inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
extsize_bytes = XFS_FSB_TO_B(mp, extsize);
+ /*
+ * This comment describes a historic gap in this verifier function.
+ *
+ * On older kernels, the extent size hint verifier doesn't check that
+ * the extent size hint is an integer multiple of the realtime extent
+ * size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
+ * The verifier has always enforced the alignment rule for regular
+ * files with the REALTIME flag set.
+ *
+ * If a directory with a misaligned extent size hint is allowed to
+ * propagate that hint into a new regular realtime file, the result
+ * is that the inode cluster buffer verifier will trigger a corruption
+ * shutdown the next time it is run.
+ *
+ * Unfortunately, there could be filesystems with these misconfigured
+ * directories in the wild, so we cannot add a check to this verifier
+ * at this time because that will result a new source of directory
+ * corruption errors when reading an existing filesystem. Instead, we
+ * permit the misconfiguration to pass through the verifiers so that
+ * callers of this function can correct and mitigate externally.
+ */
+
if (rt_flag)
blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
else
@@ -616,8 +647,15 @@ xfs_inode_validate_extsize(
/*
* Validate di_cowextsize hint.
*
- * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
- * These functions must be kept in sync with each other.
+ * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
+ * The inode does not have to have any shared blocks, but it must be a v3.
+ * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
+ * for a directory, the hint is propagated to new files.
+ * 3. Can be changed on files & directories at any time.
+ * 4. Hint value of 0 turns off hints, clears inode flags.
+ * 5. Extent size must be a multiple of the appropriate block size.
+ * 6. The extent size hint must be limited to half the AG size to avoid
+ * alignment extending the extent beyond the limits of the AG.
*/
xfs_failaddr_t
xfs_inode_validate_cowextsize(
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 78324e043e25..8d595a5c4abd 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -143,6 +143,23 @@ xfs_trans_log_inode(
}
/*
+ * Inode verifiers on older kernels don't check that the extent size
+ * hint is an integer multiple of the rt extent size on a directory
+ * with both rtinherit and extszinherit flags set. If we're logging a
+ * directory that is misconfigured in this way, clear the hint.
+ */
+ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+ (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+ xfs_info_once(ip->i_mount,
+ "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
* Record the specific change for fdatasync optimisation. This allows
* fdatasync to skip log forces for inodes that are only timestamp
* dirty.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0369eb22c1bb..e4c2da4566f1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -690,6 +690,7 @@ xfs_inode_inherit_flags(
const struct xfs_inode *pip)
{
unsigned int di_flags = 0;
+ xfs_failaddr_t failaddr;
umode_t mode = VFS_I(ip)->i_mode;
if (S_ISDIR(mode)) {
@@ -729,6 +730,24 @@ xfs_inode_inherit_flags(
di_flags |= XFS_DIFLAG_FILESTREAM;
ip->i_diflags |= di_flags;
+
+ /*
+ * Inode verifiers on older kernels only check that the extent size
+ * hint is an integer multiple of the rt extent size on realtime files.
+ * They did not check the hint alignment on a directory with both
+ * rtinherit and extszinherit flags set. If the misaligned hint is
+ * propagated from a directory into a new realtime file, new file
+ * allocations will fail due to math errors in the rt allocator and/or
+ * trip the verifiers. Validate the hint settings in the new file so
+ * that we don't let broken hints propagate.
+ */
+ failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+ VFS_I(ip)->i_mode, ip->i_diflags);
+ if (failaddr) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ }
}
/* Propagate di_flags2 from a parent inode to a child inode. */
@@ -737,12 +756,22 @@ xfs_inode_inherit_flags2(
struct xfs_inode *ip,
const struct xfs_inode *pip)
{
+ xfs_failaddr_t failaddr;
+
if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
ip->i_cowextsize = pip->i_cowextsize;
}
if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+
+ /* Don't let invalid cowextsize hints propagate. */
+ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+ VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+ if (failaddr) {
+ ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = 0;
+ }
}
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3925bfcb2365..1fe4c1fc0aea 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1267,20 +1267,8 @@ out_error:
}
/*
- * extent size hint validation is somewhat cumbersome. Rules are:
- *
- * 1. extent size hint is only valid for directories and regular files
- * 2. FS_XFLAG_EXTSIZE is only valid for regular files
- * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
- * 4. can only be changed on regular files if no extents are allocated
- * 5. can be changed on directories at any time
- * 6. extsize hint of 0 turns off hints, clears inode flags.
- * 7. Extent size must be a multiple of the appropriate block size.
- * 8. for non-realtime files, the extent size hint must be limited
- * to half the AG size to avoid alignment extending the extent beyond the
- * limits of the AG.
- *
- * Please keep this function in sync with xfs_scrub_inode_extsize.
+ * Validate a proposed extent size hint. For regular files, the hint can only
+ * be changed if no extents are allocated.
*/
static int
xfs_ioctl_setattr_check_extsize(
@@ -1288,86 +1276,65 @@ xfs_ioctl_setattr_check_extsize(
struct fileattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
- xfs_extlen_t size;
- xfs_fsblock_t extsize_fsb;
+ xfs_failaddr_t failaddr;
+ uint16_t new_diflags;
if (!fa->fsx_valid)
return 0;
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
- ((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
+ XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
return -EINVAL;
- if (fa->fsx_extsize == 0)
- return 0;
-
- extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
- if (extsize_fsb > MAXEXTLEN)
+ if (fa->fsx_extsize & mp->m_blockmask)
return -EINVAL;
- if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
- size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
+ new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
+
+ /*
+ * Inode verifiers on older kernels don't check that the extent size
+ * hint is an integer multiple of the rt extent size on a directory
+ * with both rtinherit and extszinherit flags set. Don't let sysadmins
+ * misconfigure directories.
+ */
+ if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
+ unsigned int rtextsize_bytes;
+
+ rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
+ if (fa->fsx_extsize % rtextsize_bytes)
return -EINVAL;
}
- if (fa->fsx_extsize % size)
- return -EINVAL;
-
- return 0;
+ failaddr = xfs_inode_validate_extsize(ip->i_mount,
+ XFS_B_TO_FSB(mp, fa->fsx_extsize),
+ VFS_I(ip)->i_mode, new_diflags);
+ return failaddr != NULL ? -EINVAL : 0;
}
-/*
- * CoW extent size hint validation rules are:
- *
- * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
- * The inode does not have to have any shared blocks, but it must be a v3.
- * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
- * for a directory, the hint is propagated to new files.
- * 3. Can be changed on files & directories at any time.
- * 4. CoW extsize hint of 0 turns off hints, clears inode flags.
- * 5. Extent size must be a multiple of the appropriate block size.
- * 6. The extent size hint must be limited to half the AG size to avoid
- * alignment extending the extent beyond the limits of the AG.
- *
- * Please keep this function in sync with xfs_scrub_inode_cowextsize.
- */
static int
xfs_ioctl_setattr_check_cowextsize(
struct xfs_inode *ip,
struct fileattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
- xfs_extlen_t size;
- xfs_fsblock_t cowextsize_fsb;
+ xfs_failaddr_t failaddr;
+ uint64_t new_diflags2;
+ uint16_t new_diflags;
if (!fa->fsx_valid)
return 0;
- if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
- return 0;
-
- if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb))
+ if (fa->fsx_cowextsize & mp->m_blockmask)
return -EINVAL;
- if (fa->fsx_cowextsize == 0)
- return 0;
+ new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
+ new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
- cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
- if (cowextsize_fsb > MAXEXTLEN)
- return -EINVAL;
-
- size = mp->m_sb.sb_blocksize;
- if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
- return -EINVAL;
-
- if (fa->fsx_cowextsize % size)
- return -EINVAL;
-
- return 0;
+ failaddr = xfs_inode_validate_cowextsize(ip->i_mount,
+ XFS_B_TO_FSB(mp, fa->fsx_cowextsize),
+ VFS_I(ip)->i_mode, new_diflags, new_diflags2);
+ return failaddr != NULL ? -EINVAL : 0;
}
static int
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 3c392b1512ac..7ec1a9207517 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -73,6 +73,8 @@ do { \
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
#define xfs_notice_once(dev, fmt, ...) \
xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__)
+#define xfs_info_once(dev, fmt, ...) \
+ xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__)
void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);