From 9b6304c1d53745c300b86f202d0dcff395e2d2db Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 14:58:10 -0400 Subject: fs: add ctime accessors infrastructure struct timespec64 has unused bits in the tv_nsec field that can be used for other purposes. In future patches, we're going to change how the inode->i_ctime is accessed in certain inodes in order to make use of them. In order to do that safely though, we'll need to eradicate raw accesses of the inode->i_ctime field from the kernel. Add new accessor functions for the ctime that we use to replace them. Reviewed-by: Jan Kara Reviewed-by: Luis Chamberlain Signed-off-by: Jeff Layton Reviewed-by: Damien Le Moal Message-Id: <20230705185812.579118-2-jlayton@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 8fefb69e1f84..aac5cdcf5e89 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2501,6 +2501,22 @@ struct timespec64 current_time(struct inode *inode) } EXPORT_SYMBOL(current_time); +/** + * inode_set_ctime_current - set the ctime to current_time + * @inode: inode + * + * Set the inode->i_ctime to the current value for the inode. Returns + * the current value that was assigned to i_ctime. + */ +struct timespec64 inode_set_ctime_current(struct inode *inode) +{ + struct timespec64 now = current_time(inode); + + inode_set_ctime(inode, now.tv_sec, now.tv_nsec); + return now; +} +EXPORT_SYMBOL(inode_set_ctime_current); + /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from -- cgit v1.2.3 From 2276e5ba8567f683c49a36ba885d0fe6abe2b45e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 15:00:50 -0400 Subject: fs: convert to ctime accessor functions In later patches, we're going to change how the inode's ctime field is used. Switch to using accessor functions instead of raw accesses of inode->i_ctime. Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Message-Id: <20230705190309.579783-23-jlayton@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index aac5cdcf5e89..d4ab92233062 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1853,6 +1853,7 @@ EXPORT_SYMBOL(bmap); static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) { + struct timespec64 ctime; if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; @@ -1864,7 +1865,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, /* * Is ctime younger than or equal to atime? If yes, update atime: */ - if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0) + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &inode->i_atime) >= 0) return 1; /* @@ -1887,7 +1889,7 @@ int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) if (flags & S_ATIME) inode->i_atime = *time; if (flags & S_CTIME) - inode->i_ctime = *time; + inode_set_ctime_to_ts(inode, *time); if (flags & S_MTIME) inode->i_mtime = *time; @@ -2073,6 +2075,7 @@ EXPORT_SYMBOL(file_remove_privs); static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) { int sync_it = 0; + struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) @@ -2081,7 +2084,8 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) if (!timespec64_equal(&inode->i_mtime, now)) sync_it = S_MTIME; - if (!timespec64_equal(&inode->i_ctime, now)) + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) -- cgit v1.2.3 From b3030e4f23441347cd5d18b2610fb299ed98d739 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:32 -0400 Subject: fs: remove silly warning from current_time An inode with no superblock? Unpossible! Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-1-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index d4ab92233062..3fc251bfaf73 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2495,12 +2495,6 @@ struct timespec64 current_time(struct inode *inode) struct timespec64 now; ktime_get_coarse_real_ts64(&now); - - if (unlikely(!inode->i_sb)) { - WARN(1, "current_time() called with uninitialized super_block in the inode"); - return now; - } - return timestamp_truncate(now, inode); } EXPORT_SYMBOL(current_time); -- cgit v1.2.3 From 541d4c798a598854fcce7326d947cbcbd35701d6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:34 -0400 Subject: fs: drop the timespec64 arg from generic_update_time In future patches we're going to change how the ctime is updated to keep track of when it has been queried. The way that the update_time operation works (and a lot of its callers) make this difficult, since they grab a timestamp early and then pass it down to eventually be copied into the inode. All of the existing update_time callers pass in the result of current_time() in some fashion. Drop the "time" parameter from generic_update_time, and rework it to fetch its own timestamp. This change means that an update_time could fetch a different timestamp than was seen in inode_needs_update_time. update_time is only ever called with one of two flag combinations: Either S_ATIME is set, or S_MTIME|S_CTIME|S_VERSION are set. With this change we now treat the flags argument as an indicator that some value needed to be updated when last checked, rather than an indication to update specific timestamps. Rework the logic for updating the timestamps and put it in a new inode_update_timestamps helper that other update_time routines can use. S_ATIME is as treated as we always have, but if any of the other three are set, then we attempt to update all three. Also, some callers of generic_update_time need to know what timestamps were actually updated. Change it to return an S_* flag mask to indicate that and rework the callers to expect it. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-3-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 18 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 3fc251bfaf73..e07e45f6cd01 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1881,29 +1881,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } -int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) +/** + * inode_update_timestamps - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. This function handles updating the + * actual timestamps. It's up to the caller to ensure that the inode is marked + * dirty appropriately. + * + * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated, + * attempt to update all three of them. S_ATIME updates can be handled + * independently of the rest. + * + * Returns a set of S_* flags indicating which values changed. + */ +int inode_update_timestamps(struct inode *inode, int flags) { - int dirty_flags = 0; + int updated = 0; + struct timespec64 now; + + if (flags & (S_MTIME|S_CTIME|S_VERSION)) { + struct timespec64 ctime = inode_get_ctime(inode); - if (flags & (S_ATIME | S_CTIME | S_MTIME)) { - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_CTIME) - inode_set_ctime_to_ts(inode, *time); - if (flags & S_MTIME) - inode->i_mtime = *time; - - if (inode->i_sb->s_flags & SB_LAZYTIME) - dirty_flags |= I_DIRTY_TIME; - else - dirty_flags |= I_DIRTY_SYNC; + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + if (!timespec64_equal(&now, &inode->i_mtime)) { + inode->i_mtime = now; + updated |= S_MTIME; + } + if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) + updated |= S_VERSION; + } else { + now = current_time(inode); } - if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) - dirty_flags |= I_DIRTY_SYNC; + if (flags & S_ATIME) { + if (!timespec64_equal(&now, &inode->i_atime)) { + inode->i_atime = now; + updated |= S_ATIME; + } + } + return updated; +} +EXPORT_SYMBOL(inode_update_timestamps); + +/** + * generic_update_time - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME, + * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME + * updates can be handled done independently of the rest. + * + * Returns a S_* mask indicating which fields were updated. + */ +int generic_update_time(struct inode *inode, int flags) +{ + int updated = inode_update_timestamps(inode, flags); + int dirty_flags = 0; + if (updated & (S_ATIME|S_MTIME|S_CTIME)) + dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC; + if (updated & S_VERSION) + dirty_flags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, dirty_flags); - return 0; + return updated; } EXPORT_SYMBOL(generic_update_time); @@ -1915,7 +1962,8 @@ int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) { if (inode->i_op->update_time) return inode->i_op->update_time(inode, time, flags); - return generic_update_time(inode, time, flags); + generic_update_time(inode, flags); + return 0; } EXPORT_SYMBOL(inode_update_time); -- cgit v1.2.3 From 913e99287b98fd051ac1976140a2764a8ef9dfbf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:39 -0400 Subject: fs: drop the timespec64 argument from update_time Now that all of the update_time operations are prepared for it, we can drop the timespec64 argument from the update_time operation. Do that and remove it from some associated functions like inode_update_time and inode_needs_update_time. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-8-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index e07e45f6cd01..e50d94a136fe 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1958,10 +1958,10 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) +int inode_update_time(struct inode *inode, int flags) { if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); + return inode->i_op->update_time(inode, flags); generic_update_time(inode, flags); return 0; } @@ -2015,7 +2015,6 @@ void touch_atime(const struct path *path) { struct vfsmount *mnt = path->mnt; struct inode *inode = d_inode(path->dentry); - struct timespec64 now; if (!atime_needs_update(path, inode)) return; @@ -2034,8 +2033,7 @@ void touch_atime(const struct path *path) * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ - now = current_time(inode); - inode_update_time(inode, &now, S_ATIME); + inode_update_time(inode, S_ATIME); __mnt_drop_write(mnt); skip_update: sb_end_write(inode->i_sb); @@ -2120,20 +2118,21 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) +static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; + struct timespec64 now = current_time(inode); struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; - if (!timespec64_equal(&inode->i_mtime, now)) + if (!timespec64_equal(&inode->i_mtime, &now)) sync_it = S_MTIME; ctime = inode_get_ctime(inode); - if (!timespec64_equal(&ctime, now)) + if (!timespec64_equal(&ctime, &now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) @@ -2142,15 +2141,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) return sync_it; } -static int __file_update_time(struct file *file, struct timespec64 *now, - int sync_mode) +static int __file_update_time(struct file *file, int sync_mode) { int ret = 0; struct inode *inode = file_inode(file); /* try to update time settings */ if (!__mnt_want_write_file(file)) { - ret = inode_update_time(inode, now, sync_mode); + ret = inode_update_time(inode, sync_mode); __mnt_drop_write_file(file); } @@ -2175,13 +2173,12 @@ int file_update_time(struct file *file) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } EXPORT_SYMBOL(file_update_time); @@ -2204,7 +2201,6 @@ static int file_modified_flags(struct file *file, int flags) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); /* * Clear the security bits if the process is not being run by root. @@ -2217,13 +2213,13 @@ static int file_modified_flags(struct file *file, int flags) if (unlikely(file->f_mode & FMODE_NOCMTIME)) return 0; - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; if (flags & IOCB_NOWAIT) return -EAGAIN; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } /** -- cgit v1.2.3 From ffb6cf19e06334062744b7e3493f71e500964f8e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:40 -0400 Subject: fs: add infrastructure for multigrain timestamps The VFS always uses coarse-grained timestamps when updating the ctime and mtime after a change. This has the benefit of allowing filesystems to optimize away a lot metadata updates, down to around 1 per jiffy, even when a file is under heavy writes. Unfortunately, this has always been an issue when we're exporting via NFSv3, which relies on timestamps to validate caches. A lot of changes can happen in a jiffy, so timestamps aren't sufficient to help the client decide to invalidate the cache. Even with NFSv4, a lot of exported filesystems don't properly support a change attribute and are subject to the same problems with timestamp granularity. Other applications have similar issues with timestamps (e.g backup applications). If we were to always use fine-grained timestamps, that would improve the situation, but that becomes rather expensive, as the underlying filesystem would have to log a lot more metadata updates. What we need is a way to only use fine-grained timestamps when they are being actively queried. POSIX generally mandates that when the the mtime changes, the ctime must also change. The kernel always stores normalized ctime values, so only the first 30 bits of the tv_nsec field are ever used. Use the 31st bit of the ctime tv_nsec field to indicate that something has queried the inode for the mtime or ctime. When this flag is set, on the next mtime or ctime update, the kernel will fetch a fine-grained timestamp instead of the usual coarse-grained one. Filesytems can opt into this behavior by setting the FS_MGTIME flag in the fstype. Filesystems that don't set this flag will continue to use coarse-grained timestamps. Later patches will convert individual filesystems to use the new infrastructure. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-9-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- fs/inode.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 3 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index e50d94a136fe..f55957ac80e6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2118,10 +2118,52 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); +/** + * current_mgtime - Return FS time (possibly fine-grained) + * @inode: inode. + * + * Return the current time truncated to the time granularity supported by + * the fs, as suitable for a ctime/mtime change. If the ctime is flagged + * as having been QUERIED, get a fine-grained timestamp. + */ +struct timespec64 current_mgtime(struct inode *inode) +{ + struct timespec64 now, ctime; + atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; + long nsec = atomic_long_read(pnsec); + + if (nsec & I_CTIME_QUERIED) { + ktime_get_real_ts64(&now); + return timestamp_truncate(now, inode); + } + + ktime_get_coarse_real_ts64(&now); + now = timestamp_truncate(now, inode); + + /* + * If we've recently fetched a fine-grained timestamp + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &now) > 0) + now = ctime; + + return now; +} +EXPORT_SYMBOL(current_mgtime); + +static struct timespec64 current_ctime(struct inode *inode) +{ + if (is_mgtime(inode)) + return current_mgtime(inode); + return current_time(inode); +} + static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; - struct timespec64 now = current_time(inode); + struct timespec64 now = current_ctime(inode); struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ @@ -2552,9 +2594,43 @@ EXPORT_SYMBOL(current_time); */ struct timespec64 inode_set_ctime_current(struct inode *inode) { - struct timespec64 now = current_time(inode); + struct timespec64 now; + struct timespec64 ctime; + + ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); + if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { + now = current_time(inode); - inode_set_ctime(inode, now.tv_sec, now.tv_nsec); + /* Just copy it into place if it's not multigrain */ + if (!is_mgtime(inode)) { + inode_set_ctime_to_ts(inode, now); + return now; + } + + /* + * If we've recently updated with a fine-grained timestamp, + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime.tv_sec = inode->__i_ctime.tv_sec; + if (timespec64_compare(&ctime, &now) > 0) + return ctime; + + /* + * Ctime updates are usually protected by the inode_lock, but + * we can still race with someone setting the QUERIED flag. + * Try to swap the new nsec value into place. If it's changed + * in the interim, then just go with a fine-grained timestamp. + */ + if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, + now.tv_nsec) != ctime.tv_nsec) + goto fine_grained; + inode->__i_ctime.tv_sec = now.tv_sec; + return now; + } +fine_grained: + ktime_get_real_ts64(&now); + inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); return now; } EXPORT_SYMBOL(inode_set_ctime_current); -- cgit v1.2.3