From e6595224464b692ddae193d783402130d1625147 Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Thu, 14 Mar 2024 16:36:56 +0300 Subject: fanotify: remove unneeded sub-zero check for unsigned value Unsigned size_t len in copy_fid_info_to_user is checked for negative value. This check is redundant as it is always false. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5e469c830fdb ("fanotify: copy event fid info to user") Signed-off-by: Nikita Kiryushin Signed-off-by: Jan Kara Message-Id: --- fs/notify/fanotify/fanotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fbdc63cc10d9..4201723357cf 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -502,7 +502,7 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, } /* Pad with 0's */ - WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); + WARN_ON_ONCE(len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; -- cgit v1.2.3 From 4d69c58ef2e419550f02f988ee022fca564f7dd7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 25 Mar 2024 19:36:02 -0600 Subject: fsnotify: Avoid -Wflex-array-member-not-at-end warning Use the `DEFINE_FLEX()` helper for an on-stack definition of a flexible structure where the size of the flexible-array member is known at compile-time, and refactor the rest of the code, accordingly. So, with these changes, fix the following warning: fs/notify/fdinfo.c:45:36: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Link: https://github.com/KSPP/linux/issues/202 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jan Kara Message-Id: --- fs/notify/fdinfo.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 5c430736ec12..dec553034027 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -41,29 +41,25 @@ static void show_fdinfo(struct seq_file *m, struct file *f, #if defined(CONFIG_EXPORTFS) static void show_mark_fhandle(struct seq_file *m, struct inode *inode) { - struct { - struct file_handle handle; - u8 pad[MAX_HANDLE_SZ]; - } f; + DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ); int size, ret, i; - f.handle.handle_bytes = sizeof(f.pad); - size = f.handle.handle_bytes >> 2; + size = f->handle_bytes >> 2; - ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return; } - f.handle.handle_type = ret; - f.handle.handle_bytes = size * sizeof(u32); + f->handle_type = ret; + f->handle_bytes = size * sizeof(u32); seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", - f.handle.handle_bytes, f.handle.handle_type); + f->handle_bytes, f->handle_type); - for (i = 0; i < f.handle.handle_bytes; i++) - seq_printf(m, "%02x", (int)f.handle.f_handle[i]); + for (i = 0; i < f->handle_bytes; i++) + seq_printf(m, "%02x", (int)f->f_handle[i]); } #else static void show_mark_fhandle(struct seq_file *m, struct inode *inode) -- cgit v1.2.3 From d2f277e26f521ccf6fb438463b41dba6123caabe Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:45 +0200 Subject: fsnotify: rename fsnotify_{get,put}_sb_connectors() Instead of counting the number of connectors in an sb, we would like to count the number of watched objects per priority group. As a start, create an accessor fsnotify_sb_watched_objects() to s_fsnotify_connectors and rename the fsnotify_{get,put}_sb_connectors() helpers to fsnotify_{get,put}_sb_watchers() to better describes the counter. Increment the counter at the end of fsnotify_attach_connector_to_object() if connector was attached instead of decrementing it on race to connect. This is fine, because fsnotify_delete_sb() cannot be running in parallel to fsnotify_attach_connector_to_object() which requires a reference to a filesystem object. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-2-amir73il@gmail.com> --- fs/notify/fsnotify.c | 4 +-- fs/notify/mark.c | 67 ++++++++++++++++++++++------------------ include/linux/fsnotify.h | 2 +- include/linux/fsnotify_backend.h | 5 +++ 4 files changed, 45 insertions(+), 33 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2fc105a72a8f..503e7c75e777 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -92,8 +92,8 @@ void fsnotify_sb_delete(struct super_block *sb) fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ - wait_var_event(&sb->s_fsnotify_connectors, - !atomic_long_read(&sb->s_fsnotify_connectors)); + wait_var_event(fsnotify_sb_watched_objects(sb), + !atomic_long_read(fsnotify_sb_watched_objects(sb))); } /* diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d6944ff86ffa..3bcce2671bd5 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -116,10 +116,43 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) return *fsnotify_conn_mask_p(conn); } +static void fsnotify_get_sb_watched_objects(struct super_block *sb) +{ + atomic_long_inc(fsnotify_sb_watched_objects(sb)); +} + +static void fsnotify_put_sb_watched_objects(struct super_block *sb) +{ + if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb))) + wake_up_var(fsnotify_sb_watched_objects(sb)); +} + static void fsnotify_get_inode_ref(struct inode *inode) { ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); + fsnotify_get_sb_watched_objects(inode->i_sb); +} + +static void fsnotify_put_inode_ref(struct inode *inode) +{ + fsnotify_put_sb_watched_objects(inode->i_sb); + iput(inode); +} + +static void fsnotify_get_sb_watchers(struct fsnotify_mark_connector *conn) +{ + struct super_block *sb = fsnotify_connector_sb(conn); + + if (sb) + fsnotify_get_sb_watched_objects(sb); +} + +static void fsnotify_put_sb_watchers(struct fsnotify_mark_connector *conn) +{ + struct super_block *sb = fsnotify_connector_sb(conn); + + if (sb) + fsnotify_put_sb_watched_objects(sb); } /* @@ -213,31 +246,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_put_inode_ref(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - iput(inode); - if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - -static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb) - atomic_long_inc(&sb->s_fsnotify_connectors); -} - -static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) @@ -261,7 +269,7 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } - fsnotify_put_sb_connectors(conn); + fsnotify_put_sb_watchers(conn); rcu_assign_pointer(*(conn->obj), NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; @@ -549,8 +557,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->flags = 0; conn->type = obj_type; conn->obj = connp; - conn->flags = 0; - fsnotify_get_sb_connectors(conn); /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -558,10 +564,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); + return 0; } + fsnotify_get_sb_watchers(conn); return 0; } diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1a9de119a0f7..e470bb67c9a3 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -20,7 +20,7 @@ /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { - return atomic_long_read(&sb->s_fsnotify_connectors); + return atomic_long_read(fsnotify_sb_watched_objects(sb)); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8f40c349b228..d4e3bc55d174 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -483,6 +483,11 @@ struct fsnotify_mark_connector { struct hlist_head list; }; +static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) +{ + return &sb->s_fsnotify_connectors; +} + /* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events -- cgit v1.2.3 From f115815d75332f9dabb0d7c29c8f67b0f26889c5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:46 +0200 Subject: fsnotify: create helpers to get sb and connp from object In preparation to passing an object pointer to add/remove/find mark helpers, create helpers to get sb and connp by object type. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-3-amir73il@gmail.com> --- fs/notify/fsnotify.h | 15 +++++++++++++++ fs/notify/mark.c | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index fde74eb333cc..6d9951aac449 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -27,6 +27,21 @@ static inline struct super_block *fsnotify_conn_sb( return container_of(conn->obj, struct super_block, s_fsnotify_marks); } +static inline struct super_block *fsnotify_object_sb(void *obj, + enum fsnotify_obj_type obj_type) +{ + switch (obj_type) { + case FSNOTIFY_OBJ_TYPE_INODE: + return ((struct inode *)obj)->i_sb; + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: + return ((struct vfsmount *)obj)->mnt_sb; + case FSNOTIFY_OBJ_TYPE_SB: + return (struct super_block *)obj; + default: + return NULL; + } +} + static inline struct super_block *fsnotify_connector_sb( struct fsnotify_mark_connector *conn) { diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 3bcce2671bd5..d9977af25832 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -97,6 +97,21 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) refcount_inc(&mark->refcnt); } +static fsnotify_connp_t *fsnotify_object_connp(void *obj, + enum fsnotify_obj_type obj_type) +{ + switch (obj_type) { + case FSNOTIFY_OBJ_TYPE_INODE: + return &((struct inode *)obj)->i_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: + return &real_mount(obj)->mnt_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_SB: + return &((struct super_block *)obj)->s_fsnotify_marks; + default: + return NULL; + } +} + static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) { if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) -- cgit v1.2.3 From 230d97d39ee2eb9030309f04f98615aaeb420dac Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:47 +0200 Subject: fsnotify: create a wrapper fsnotify_find_inode_mark() In preparation to passing an object pointer to fsnotify_find_mark(), add a wrapper fsnotify_find_inode_mark() and use it where possible. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-4-amir73il@gmail.com> --- fs/nfsd/filecache.c | 4 ++-- fs/notify/dnotify/dnotify.c | 4 ++-- fs/notify/inotify/inotify_user.c | 2 +- include/linux/fsnotify_backend.h | 7 +++++++ kernel/audit_tree.c | 2 +- kernel/audit_watch.c | 2 +- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ddd3e0d9cfa6..ad9083ca144b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -159,8 +159,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) do { fsnotify_group_lock(nfsd_file_fsnotify_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + mark = fsnotify_find_inode_mark(inode, + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 3464fa7e8538..f3669403fabf 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -162,7 +162,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 85d8fdd55329..4ffc30606e0b 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -544,7 +544,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int create = (arg & IN_MASK_CREATE); int ret; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); + fsn_mark = fsnotify_find_inode_mark(inode, group); if (!fsn_mark) return -ENOENT; else if (create) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d4e3bc55d174..992b57a7e95f 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -789,6 +789,13 @@ static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, FSNOTIFY_OBJ_TYPE_INODE, add_flags); } +static inline struct fsnotify_mark *fsnotify_find_inode_mark( + struct inode *inode, + struct fsnotify_group *group) +{ + return fsnotify_find_mark(&inode->i_fsnotify_marks, group); +} + /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 1b07e6f12a07..f2f38903b2fe 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -463,7 +463,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) int n; fsnotify_group_lock(audit_tree_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); + mark = fsnotify_find_inode_mark(inode, audit_tree_group); if (!mark) return create_chunk(inode, tree); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 7a98cd176a12..7f358740e958 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -90,7 +90,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode) struct audit_parent *parent = NULL; struct fsnotify_mark *entry; - entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group); + entry = fsnotify_find_inode_mark(inode, audit_watch_group); if (entry) parent = container_of(entry, struct audit_parent, mark); -- cgit v1.2.3 From b5cae086cc2fde629495d988106d70e44c90cb20 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:48 +0200 Subject: fanotify: merge two checks regarding add of ignore mark There are two similar checks for adding an ignore mark without FAN_MARK_IGNORED_SURV_MODIFY, one for the old FAN_MARK_IGNORED flag and one for the new FAN_MARK_IGNORE flag. Merge the two checks into a single location. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-5-amir73il@gmail.com> --- fs/notify/fanotify/fanotify_user.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 4201723357cf..04a0b31f325d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1412,18 +1412,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags, struct fan_fsid *fsid) { - pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); - - /* - * If some other task has this inode open for write we should not add - * an ignore mask, unless that ignore mask is supposed to survive - * modification changes anyway. - */ - if ((flags & FANOTIFY_MARK_IGNORE_BITS) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && - inode_is_open_for_write(inode)) - return 0; - return fanotify_add_mark(group, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } @@ -1913,12 +1901,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; - ret = mnt ? -EINVAL : -EISDIR; - /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ - if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode)) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) - goto path_put_and_out; + /* + * If some other task has this inode open for write we should not add + * an ignore mask, unless that ignore mask is supposed to survive + * modification changes anyway. + */ + if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && + !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { + ret = mnt ? -EINVAL : -EISDIR; + /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ + if (ignore == FAN_MARK_IGNORE && + (mnt || S_ISDIR(inode->i_mode))) + goto path_put_and_out; + + ret = 0; + if (inode && inode_is_open_for_write(inode)) + goto path_put_and_out; + } /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { -- cgit v1.2.3 From 687c217c6aa2c24e6ddf98cc7f86a5b986e5918d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:49 +0200 Subject: fsnotify: pass object pointer and type to fsnotify mark helpers Instead of passing fsnotify_connp_t, pass the pointer to the marked object. Store the object pointer in the connector and move the definition of fsnotify_connp_t to internal fsnotify subsystem API, so it is no longer used by fsnotify backends. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-6-amir73il@gmail.com> --- fs/notify/fanotify/fanotify_user.c | 95 ++++++++------------------------------ fs/notify/fsnotify.h | 23 ++++----- fs/notify/mark.c | 28 ++++++----- include/linux/fsnotify_backend.h | 33 +++++-------- 4 files changed, 59 insertions(+), 120 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 04a0b31f325d..925015ee7fbd 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1076,7 +1076,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, } static int fanotify_remove_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, __u32 mask, + void *obj, unsigned int obj_type, __u32 mask, unsigned int flags, __u32 umask) { struct fsnotify_mark *fsn_mark = NULL; @@ -1084,7 +1084,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, int destroy_mark; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { fsnotify_group_unlock(group); return -ENOENT; @@ -1105,30 +1105,6 @@ static int fanotify_remove_mark(struct fsnotify_group *group, return 0; } -static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - mask, flags, umask); -} - -static int fanotify_remove_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, - flags, umask); -} - -static int fanotify_remove_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, - flags, umask); -} - static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, unsigned int fan_flags) { @@ -1249,7 +1225,7 @@ static int fanotify_set_mark_fsid(struct fsnotify_group *group, } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, + void *obj, unsigned int obj_type, unsigned int fan_flags, struct fan_fsid *fsid) @@ -1288,7 +1264,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; } - ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); if (ret) goto out_put_mark; @@ -1344,7 +1320,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, } static int fanotify_add_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, __u32 mask, unsigned int fan_flags, struct fan_fsid *fsid) { @@ -1353,9 +1329,9 @@ static int fanotify_add_mark(struct fsnotify_group *group, int ret = 0; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, + fsn_mark = fanotify_add_new_mark(group, obj, obj_type, fan_flags, fsid); if (IS_ERR(fsn_mark)) { fsnotify_group_unlock(group); @@ -1392,30 +1368,6 @@ out: return ret; } -static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); -} - -static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); -} - -static int fanotify_add_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); -} - static struct fsnotify_event *fanotify_alloc_overflow_event(void) { struct fanotify_event *oevent; @@ -1738,6 +1690,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; + void *obj; u32 umask = 0; int ret; @@ -1896,10 +1849,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* inode held in place by reference to path; group by fget on fd */ - if (mark_type == FAN_MARK_INODE) + if (mark_type == FAN_MARK_INODE) { inode = path.dentry->d_inode; - else + obj = inode; + } else { mnt = path.mnt; + if (mark_type == FAN_MARK_MOUNT) + obj = mnt; + else + obj = mnt->mnt_sb; + } /* * If some other task has this inode open for write we should not add @@ -1935,26 +1894,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (mark_cmd) { case FAN_MARK_ADD: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, - flags, fsid); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, - flags, fsid); - else - ret = fanotify_add_inode_mark(group, inode, mask, - flags, fsid); + ret = fanotify_add_mark(group, obj, obj_type, mask, flags, + fsid); break; case FAN_MARK_REMOVE: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, - flags, umask); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, - flags, umask); - else - ret = fanotify_remove_inode_mark(group, inode, mask, - flags, umask); + ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, + umask); break; default: ret = -EINVAL; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 6d9951aac449..b1fec7cd8ee4 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,22 +9,28 @@ #include "../mount.h" +/* + * fsnotify_connp_t is what we embed in objects which connector can be attached + * to. + */ +typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; + static inline struct inode *fsnotify_conn_inode( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct inode, i_fsnotify_marks); + return conn->obj; } static inline struct mount *fsnotify_conn_mount( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct mount, mnt_fsnotify_marks); + return real_mount(conn->obj); } static inline struct super_block *fsnotify_conn_sb( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct super_block, s_fsnotify_marks); + return conn->obj; } static inline struct super_block *fsnotify_object_sb(void *obj, @@ -45,16 +51,7 @@ static inline struct super_block *fsnotify_object_sb(void *obj, static inline struct super_block *fsnotify_connector_sb( struct fsnotify_mark_connector *conn) { - switch (conn->type) { - case FSNOTIFY_OBJ_TYPE_INODE: - return fsnotify_conn_inode(conn)->i_sb; - case FSNOTIFY_OBJ_TYPE_VFSMOUNT: - return fsnotify_conn_mount(conn)->mnt.mnt_sb; - case FSNOTIFY_OBJ_TYPE_SB: - return fsnotify_conn_sb(conn); - default: - return NULL; - } + return fsnotify_object_sb(conn->obj, conn->type); } /* destroy all events sitting in this groups notification queue */ diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d9977af25832..1f611e4eab53 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -265,6 +265,7 @@ static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) { + fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); struct inode *inode = NULL; *type = conn->type; @@ -285,7 +286,7 @@ static void *fsnotify_detach_connector_from_object( } fsnotify_put_sb_watchers(conn); - rcu_assign_pointer(*(conn->obj), NULL); + rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; @@ -560,7 +561,7 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int obj_type) + void *obj, unsigned int obj_type) { struct fsnotify_mark_connector *conn; @@ -571,7 +572,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, INIT_HLIST_HEAD(&conn->list); conn->flags = 0; conn->type = obj_type; - conn->obj = connp; + conn->obj = obj; /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -620,24 +621,25 @@ out: * to which group and for which inodes. These marks are ordered according to * priority, highest number first, and then by the group's location in memory. */ -static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; + fsnotify_connp_t *connp; int cmp; int err = 0; if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) return -EINVAL; + connp = fsnotify_object_connp(obj, obj_type); restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, obj_type); + err = fsnotify_attach_connector_to_object(connp, obj, obj_type); if (err) return err; goto restart; @@ -689,7 +691,7 @@ out_err: * event types should be delivered to which group. */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, int add_flags) { struct fsnotify_group *group = mark->group; @@ -710,7 +712,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); if (ret) goto err; @@ -728,14 +730,14 @@ err: return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { int ret; struct fsnotify_group *group = mark->group; fsnotify_group_lock(group); - ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); fsnotify_group_unlock(group); return ret; } @@ -745,12 +747,16 @@ EXPORT_SYMBOL_GPL(fsnotify_add_mark); * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, struct fsnotify_group *group) { + fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; + if (!connp) + return NULL; + conn = fsnotify_grab_connector(connp); if (!conn) return NULL; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 992b57a7e95f..face68fcf850 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -456,13 +456,6 @@ FSNOTIFY_ITER_FUNCS(sb, SB) type < FSNOTIFY_ITER_TYPE_COUNT; \ type++) -/* - * fsnotify_connp_t is what we embed in objects which connector can be attached - * to. fsnotify_connp_t * is how we refer from connector back to object. - */ -struct fsnotify_mark_connector; -typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; - /* * Inode/vfsmount/sb point to this structure which tracks all marks attached to * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this @@ -476,7 +469,7 @@ struct fsnotify_mark_connector { unsigned short flags; /* flags [lock] */ union { /* Object pointer [lock] */ - fsnotify_connp_t *obj; + void *obj; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; @@ -763,37 +756,35 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, - struct fsnotify_group *group); +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, + struct fsnotify_group *group); /* attach the mark to the object */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags); +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark_locked(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); } static inline struct fsnotify_mark *fsnotify_find_inode_mark( struct inode *inode, struct fsnotify_group *group) { - return fsnotify_find_mark(&inode->i_fsnotify_marks, group); + return fsnotify_find_mark(inode, FSNOTIFY_OBJ_TYPE_INODE, group); } /* given a group and a mark, flag mark to be freed when all references are dropped */ -- cgit v1.2.3 From c9d4603b054f9a63c07c7012040af4c80adb2c60 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:50 +0200 Subject: fsnotify: create helper fsnotify_update_sb_watchers() We would like to count watched objects by priority group, so we will need to update the watched object counter after adding/removing marks. Create a helper fsnotify_update_sb_watchers() and call it after attaching/detaching a mark, instead of fsnotify_{get,put}_sb_watchers() only after attaching/detaching a connector. Soon, we will use this helper to count watched objects by the highest watching priority group. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-7-amir73il@gmail.com> --- fs/notify/mark.c | 36 ++++++++++++++++++++++-------------- include/linux/fsnotify_backend.h | 1 + 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 1f611e4eab53..647c49c20467 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -154,20 +154,23 @@ static void fsnotify_put_inode_ref(struct inode *inode) iput(inode); } -static void fsnotify_get_sb_watchers(struct fsnotify_mark_connector *conn) +/* + * Grab or drop watched objects reference depending on whether the connector + * is attached and has any marks attached. + */ +static void fsnotify_update_sb_watchers(struct super_block *sb, + struct fsnotify_mark_connector *conn) { - struct super_block *sb = fsnotify_connector_sb(conn); + bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; + bool has_marks = conn->obj && !hlist_empty(&conn->list); - if (sb) + if (has_marks && !is_watched) { + conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; fsnotify_get_sb_watched_objects(sb); -} - -static void fsnotify_put_sb_watchers(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb) + } else if (!has_marks && is_watched) { + conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; fsnotify_put_sb_watched_objects(sb); + } } /* @@ -266,6 +269,7 @@ static void *fsnotify_detach_connector_from_object( unsigned int *type) { fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); + struct super_block *sb = fsnotify_connector_sb(conn); struct inode *inode = NULL; *type = conn->type; @@ -285,10 +289,10 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } - fsnotify_put_sb_watchers(conn); rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -340,6 +344,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { + struct super_block *sb = fsnotify_connector_sb(conn); + + /* Update watched objects after detaching mark */ + if (sb) + fsnotify_update_sb_watchers(sb, conn); objp = __fsnotify_recalc_mask(conn); type = conn->type; } @@ -581,10 +590,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ kmem_cache_free(fsnotify_mark_connector_cachep, conn); - return 0; } - - fsnotify_get_sb_watchers(conn); return 0; } @@ -624,6 +630,7 @@ out: static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { + struct super_block *sb = fsnotify_object_sb(obj, obj_type); struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; fsnotify_connp_t *connp; @@ -673,6 +680,7 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index face68fcf850..83004d9e07a3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -465,6 +465,7 @@ FSNOTIFY_ITER_FUNCS(sb, SB) struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ +#define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ union { -- cgit v1.2.3 From 07a3b8d0bf726a1e49b050bbc6bd72f031e505fe Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:51 +0200 Subject: fsnotify: lazy attach fsnotify_sb_info state to sb Define a container struct fsnotify_sb_info to hold per-sb state, including the reference to sb marks connector. Allocate the fsnotify_sb_info state before attaching connector to any object on the sb and free it only when killing sb. This state is going to be used for storing per priority watched objects counters. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-8-amir73il@gmail.com> --- fs/notify/fsnotify.c | 16 +++++++++++++--- fs/notify/fsnotify.h | 9 ++++++++- fs/notify/mark.c | 32 +++++++++++++++++++++++++++++++- include/linux/fs.h | 8 ++++---- include/linux/fsnotify_backend.h | 17 +++++++++++++++++ 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 503e7c75e777..fb3f36bc6ea9 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb) void fsnotify_sb_delete(struct super_block *sb) { + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return; + fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), !atomic_long_read(fsnotify_sb_watched_objects(sb))); + kfree(sbinfo); } /* @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!sb->s_fsnotify_marks && + if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && (!inode2 || !inode2->i_fsnotify_marks)) @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = - fsnotify_first_mark(&sb->s_fsnotify_marks); + if (sbinfo) { + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = + fsnotify_first_mark(&sbinfo->sb_marks); + } if (mnt) { iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index b1fec7cd8ee4..2d059f789ee3 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -54,6 +54,13 @@ static inline struct super_block *fsnotify_connector_sb( return fsnotify_object_sb(conn->obj, conn->type); } +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + return sbinfo ? &sbinfo->sb_marks : NULL; +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -79,7 +86,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) /* run the list of all marks associated with sb and destroy them */ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) { - fsnotify_destroy_marks(&sb->s_fsnotify_marks); + fsnotify_destroy_marks(fsnotify_sb_marks(sb)); } /* diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 647c49c20467..b2f5d8c9cce1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -106,7 +106,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, case FSNOTIFY_OBJ_TYPE_VFSMOUNT: return &real_mount(obj)->mnt_fsnotify_marks; case FSNOTIFY_OBJ_TYPE_SB: - return &((struct super_block *)obj)->s_fsnotify_marks; + return fsnotify_sb_marks(obj); default: return NULL; } @@ -569,6 +569,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +static int fsnotify_attach_info_to_sb(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo; + + /* sb info is freed on fsnotify_sb_delete() */ + sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); + if (!sbinfo) + return -ENOMEM; + + /* + * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() + * will observe an initialized structure + */ + if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { + /* Someone else created sbinfo for us */ + kfree(sbinfo); + } + return 0; +} + static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, void *obj, unsigned int obj_type) { @@ -640,6 +660,16 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) return -EINVAL; + /* + * Attach the sb info before attaching a connector to any object on sb. + * The sb info will remain attached as long as sb lives. + */ + if (!fsnotify_sb_info(sb)) { + err = fsnotify_attach_info_to_sb(sb); + if (err) + return err; + } + connp = fsnotify_object_connp(obj, obj_type); restart: spin_lock(&mark->lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 00fc429b0af0..7f40b592f711 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -73,6 +73,8 @@ struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; +struct fsnotify_mark_connector; +struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; struct fileattr; @@ -620,8 +622,6 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 -struct fsnotify_mark_connector; - /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -1249,7 +1249,7 @@ struct super_block { /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and - * s_fsnotify_marks together for cache efficiency. They are frequently + * s_fsnotify_info together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ @@ -1261,7 +1261,7 @@ struct super_block { time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; - struct fsnotify_mark_connector __rcu *s_fsnotify_marks; + struct fsnotify_sb_info *s_fsnotify_info; #endif /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 83004d9e07a3..c9f2b2f6b493 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -477,6 +477,23 @@ struct fsnotify_mark_connector { struct hlist_head list; }; +/* + * Container for per-sb fsnotify state (sb marks and more). + * Attached lazily on first marked object on the sb and freed when killing sb. + */ +struct fsnotify_sb_info { + struct fsnotify_mark_connector __rcu *sb_marks; +}; + +static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) +{ +#ifdef CONFIG_FSNOTIFY + return READ_ONCE(sb->s_fsnotify_info); +#else + return NULL; +#endif +} + static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) { return &sb->s_fsnotify_connectors; -- cgit v1.2.3 From cb5d4f48c10445c97a22af0bd8b9cf0ed6cc8036 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:52 +0200 Subject: fsnotify: move s_fsnotify_connectors into fsnotify_sb_info Move the s_fsnotify_connectors counter into the per-sb fsnotify state. Suggested-by: Christian Brauner Signed-off-by: Amir Goldstein Reviewed-by: Christian Brauner Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-9-amir73il@gmail.com> --- include/linux/fs.h | 6 ------ include/linux/fsnotify.h | 8 +++++++- include/linux/fsnotify_backend.h | 7 ++++++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f40b592f711..c36c2f8fdbe3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1302,12 +1302,6 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; - /* - * Number of inode/mount/sb objects that are being watched, note that - * inodes objects are currently double-accounted. - */ - atomic_long_t s_fsnotify_connectors; - /* Read-only state of the superblock is being changed */ int s_readonly_remount; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index e470bb67c9a3..48dc65702415 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -20,7 +20,13 @@ /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { - return atomic_long_read(fsnotify_sb_watched_objects(sb)); + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return false; + + return atomic_long_read(&sbinfo->watched_objects); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c9f2b2f6b493..ec592aeadfa3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -483,6 +483,11 @@ struct fsnotify_mark_connector { */ struct fsnotify_sb_info { struct fsnotify_mark_connector __rcu *sb_marks; + /* + * Number of inode/mount/sb objects that are being watched in this sb. + * Note that inodes objects are currently double-accounted. + */ + atomic_long_t watched_objects; }; static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) @@ -496,7 +501,7 @@ static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) { - return &sb->s_fsnotify_connectors; + return &fsnotify_sb_info(sb)->watched_objects; } /* -- cgit v1.2.3 From 477cf917dd02853ba78a73cdeb6548889e5f8cd7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:53 +0200 Subject: fsnotify: use an enum for group priority constants And use meaningfull names for the constants. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-10-amir73il@gmail.com> --- fs/notify/fanotify/fanotify_user.c | 11 +++++------ include/linux/fsnotify_backend.h | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 925015ee7fbd..483a6a1255fb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1516,13 +1516,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) INIT_LIST_HEAD(&group->fanotify_data.access_list); switch (class) { case FAN_CLASS_NOTIF: - group->priority = FS_PRIO_0; + group->priority = FSNOTIFY_PRIO_NORMAL; break; case FAN_CLASS_CONTENT: - group->priority = FS_PRIO_1; + group->priority = FSNOTIFY_PRIO_CONTENT; break; case FAN_CLASS_PRE_CONTENT: - group->priority = FS_PRIO_2; + group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: fd = -EINVAL; @@ -1774,12 +1774,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; /* - * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not - * allowed to set permissions events. + * Permission events require minimum priority FAN_CLASS_CONTENT. */ ret = -EINVAL; if (mask & FANOTIFY_PERM_EVENTS && - group->priority == FS_PRIO_0) + group->priority < FSNOTIFY_PRIO_CONTENT) goto fput_and_out; if (mask & FAN_FS_ERROR && diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ec592aeadfa3..fc38587d8564 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -176,6 +176,17 @@ struct fsnotify_event { struct list_head list; }; +/* + * fsnotify group priorities. + * Events are sent in order from highest priority to lowest priority. + */ +enum fsnotify_group_prio { + FSNOTIFY_PRIO_NORMAL = 0, /* normal notifiers, no permissions */ + FSNOTIFY_PRIO_CONTENT, /* fanotify permission events */ + FSNOTIFY_PRIO_PRE_CONTENT, /* fanotify pre-content events */ + __FSNOTIFY_PRIO_NUM +}; + /* * A group is a "thing" that wants to receive notification about filesystem * events. The mask holds the subset of event types this group cares about. @@ -201,14 +212,7 @@ struct fsnotify_group { wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ - /* - * Valid fsnotify group priorities. Events are send in order from highest - * priority to lowest priority. We default to the lowest priority. - */ - #define FS_PRIO_0 0 /* normal notifiers, no permissions */ - #define FS_PRIO_1 1 /* fanotify content based access control */ - #define FS_PRIO_2 2 /* fanotify pre-content access */ - unsigned int priority; + enum fsnotify_group_prio priority; /* priority for sending events */ bool shutdown; /* group is being shut down, don't queue more events */ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ -- cgit v1.2.3 From a5e57b4d370c6d320e5bfb0c919fe00aee29e039 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Mar 2024 20:41:54 +0200 Subject: fsnotify: optimize the case of no permission event watchers Commit e43de7f0862b ("fsnotify: optimize the case of no marks of any type") optimized the case where there are no fsnotify watchers on any of the filesystem's objects. It is quite common for a system to have a single local filesystem and it is quite common for the system to have some inotify watches on some config files or directories, so the optimization of no marks at all is often not in effect. Permission event watchers, which require high priority group are more rare, so optimizing the case of no marks og high priority groups can improve performance for more systems, especially for performance sensitive io workloads. Count per-sb watched objects by high priority groups and use that the optimize out the call to __fsnotify_parent() and fsnotify() in fsnotify permission hooks. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240317184154.1200192-11-amir73il@gmail.com> --- fs/notify/fsnotify.c | 3 +++ fs/notify/mark.c | 30 +++++++++++++++++++++++++++--- include/linux/fsnotify.h | 19 ++++++++++++++++--- include/linux/fsnotify_backend.h | 11 ++++++++--- 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index fb3f36bc6ea9..2ae965ef37e8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -100,6 +100,9 @@ void fsnotify_sb_delete(struct super_block *sb) /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), !atomic_long_read(fsnotify_sb_watched_objects(sb))); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT)); kfree(sbinfo); } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index b2f5d8c9cce1..c3eefa70633c 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -161,13 +161,36 @@ static void fsnotify_put_inode_ref(struct inode *inode) static void fsnotify_update_sb_watchers(struct super_block *sb, struct fsnotify_mark_connector *conn) { + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; - bool has_marks = conn->obj && !hlist_empty(&conn->list); + struct fsnotify_mark *first_mark = NULL; + unsigned int highest_prio = 0; - if (has_marks && !is_watched) { + if (conn->obj) + first_mark = hlist_entry_safe(conn->list.first, + struct fsnotify_mark, obj_list); + if (first_mark) + highest_prio = first_mark->group->priority; + if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) + highest_prio = 0; + + /* + * If the highest priority of group watching this object is prio, + * then watched object has a reference on counters [0..prio]. + * Update priority >= 1 watched objects counters. + */ + for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) + atomic_long_inc(&sbinfo->watched_objects[p]); + for (unsigned int p = conn->prio; p > highest_prio; p--) + atomic_long_dec(&sbinfo->watched_objects[p]); + conn->prio = highest_prio; + + /* Update priority >= 0 (a.k.a total) watched objects counter */ + BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); + if (first_mark && !is_watched) { conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; fsnotify_get_sb_watched_objects(sb); - } else if (!has_marks && is_watched) { + } else if (!first_mark && is_watched) { conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; fsnotify_put_sb_watched_objects(sb); } @@ -600,6 +623,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); conn->flags = 0; + conn->prio = 0; conn->type = obj_type; conn->obj = obj; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 48dc65702415..4da80e92f804 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,8 +17,9 @@ #include #include -/* Are there any inode/mount/sb objects that are being watched at all? */ -static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +/* Are there any inode/mount/sb objects watched with priority prio or above? */ +static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, + int prio) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); @@ -26,7 +27,13 @@ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) if (!sbinfo) return false; - return atomic_long_read(&sbinfo->watched_objects); + return atomic_long_read(&sbinfo->watched_objects[prio]); +} + +/* Are there any inode/mount/sb objects that are being watched at all? */ +static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +{ + return fsnotify_sb_has_priority_watchers(sb, 0); } /* @@ -109,6 +116,12 @@ static inline int fsnotify_file(struct file *file, __u32 mask) return 0; path = &file->f_path; + /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ + if (mask & ALL_FSNOTIFY_PERM_EVENTS && + !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, + FSNOTIFY_PRIO_CONTENT)) + return 0; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index fc38587d8564..7f1ab8264e41 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -468,7 +468,8 @@ FSNOTIFY_ITER_FUNCS(sb, SB) */ struct fsnotify_mark_connector { spinlock_t lock; - unsigned short type; /* Type of object [lock] */ + unsigned char type; /* Type of object [lock] */ + unsigned char prio; /* Highest priority group */ #define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ @@ -490,8 +491,12 @@ struct fsnotify_sb_info { /* * Number of inode/mount/sb objects that are being watched in this sb. * Note that inodes objects are currently double-accounted. + * + * The value in watched_objects[prio] is the number of objects that are + * watched by groups of priority >= prio, so watched_objects[0] is the + * total number of watched objects in this sb. */ - atomic_long_t watched_objects; + atomic_long_t watched_objects[__FSNOTIFY_PRIO_NUM]; }; static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) @@ -505,7 +510,7 @@ static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) { - return &fsnotify_sb_info(sb)->watched_objects; + return &fsnotify_sb_info(sb)->watched_objects[0]; } /* -- cgit v1.2.3 From 795bb82d12a16a4cee42845b0e4c7e3276574e5d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 16 Apr 2024 21:14:52 +0300 Subject: fsnotify: fix UAF from FS_ERROR event on a shutting down filesystem Protect against use after free when filesystem calls fsnotify_sb_error() during fs shutdown. Move freeing of sb->s_fsnotify_info to destroy_super_work(), because it may be accessed from fs shutdown context. Reported-by: syzbot+5e3f9b2a67b45f16d4e6@syzkaller.appspotmail.com Suggested-by: Jan Kara Link: https://lore.kernel.org/linux-fsdevel/20240416173211.4lnmgctyo4jn5fha@quack3/ Fixes: 07a3b8d0bf72 ("fsnotify: lazy attach fsnotify_sb_info state to sb") Reviewed-by: Christian Brauner Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240416181452.567070-1-amir73il@gmail.com> --- fs/notify/fsnotify.c | 6 +++++- fs/super.c | 1 + include/linux/fsnotify_backend.h | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2ae965ef37e8..ff69ae24c4e8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -103,7 +103,11 @@ void fsnotify_sb_delete(struct super_block *sb) WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT)); - kfree(sbinfo); +} + +void fsnotify_sb_free(struct super_block *sb) +{ + kfree(sb->s_fsnotify_info); } /* diff --git a/fs/super.c b/fs/super.c index 71d9779c42b1..81656c7807db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -274,6 +274,7 @@ static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); + fsnotify_sb_free(s); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f1ab8264e41..4dd6143db271 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -576,6 +576,7 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) @@ -880,6 +881,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void fsnotify_sb_free(struct super_block *sb) +{} + static inline void fsnotify_update_flags(struct dentry *dentry) {} -- cgit v1.2.3