diff options
-rw-r--r-- | fs/nfsd/filecache.c | 4 | ||||
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 4 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 143 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 20 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 27 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 39 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 2 | ||||
-rw-r--r-- | fs/notify/mark.c | 174 | ||||
-rw-r--r-- | fs/super.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 14 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 21 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 97 | ||||
-rw-r--r-- | kernel/audit_tree.c | 2 | ||||
-rw-r--r-- | kernel/audit_watch.c | 2 |
14 files changed, 334 insertions, 216 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ddd3e0d9cfa6..ad9083ca144b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -159,8 +159,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) do { fsnotify_group_lock(nfsd_file_fsnotify_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + mark = fsnotify_find_inode_mark(inode, + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 3464fa7e8538..f3669403fabf 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -162,7 +162,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fbdc63cc10d9..483a6a1255fb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -502,7 +502,7 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, } /* Pad with 0's */ - WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); + WARN_ON_ONCE(len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; @@ -1076,7 +1076,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, } static int fanotify_remove_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, __u32 mask, + void *obj, unsigned int obj_type, __u32 mask, unsigned int flags, __u32 umask) { struct fsnotify_mark *fsn_mark = NULL; @@ -1084,7 +1084,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, int destroy_mark; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { fsnotify_group_unlock(group); return -ENOENT; @@ -1105,30 +1105,6 @@ static int fanotify_remove_mark(struct fsnotify_group *group, return 0; } -static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - mask, flags, umask); -} - -static int fanotify_remove_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, - flags, umask); -} - -static int fanotify_remove_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, - flags, umask); -} - static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, unsigned int fan_flags) { @@ -1249,7 +1225,7 @@ static int fanotify_set_mark_fsid(struct fsnotify_group *group, } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, + void *obj, unsigned int obj_type, unsigned int fan_flags, struct fan_fsid *fsid) @@ -1288,7 +1264,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; } - ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); if (ret) goto out_put_mark; @@ -1344,7 +1320,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, } static int fanotify_add_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, __u32 mask, unsigned int fan_flags, struct fan_fsid *fsid) { @@ -1353,9 +1329,9 @@ static int fanotify_add_mark(struct fsnotify_group *group, int ret = 0; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, + fsn_mark = fanotify_add_new_mark(group, obj, obj_type, fan_flags, fsid); if (IS_ERR(fsn_mark)) { fsnotify_group_unlock(group); @@ -1392,42 +1368,6 @@ out: return ret; } -static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); -} - -static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); -} - -static int fanotify_add_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); - - /* - * If some other task has this inode open for write we should not add - * an ignore mask, unless that ignore mask is supposed to survive - * modification changes anyway. - */ - if ((flags & FANOTIFY_MARK_IGNORE_BITS) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && - inode_is_open_for_write(inode)) - return 0; - - return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); -} - static struct fsnotify_event *fanotify_alloc_overflow_event(void) { struct fanotify_event *oevent; @@ -1576,13 +1516,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) INIT_LIST_HEAD(&group->fanotify_data.access_list); switch (class) { case FAN_CLASS_NOTIF: - group->priority = FS_PRIO_0; + group->priority = FSNOTIFY_PRIO_NORMAL; break; case FAN_CLASS_CONTENT: - group->priority = FS_PRIO_1; + group->priority = FSNOTIFY_PRIO_CONTENT; break; case FAN_CLASS_PRE_CONTENT: - group->priority = FS_PRIO_2; + group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: fd = -EINVAL; @@ -1750,6 +1690,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; + void *obj; u32 umask = 0; int ret; @@ -1833,12 +1774,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; /* - * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not - * allowed to set permissions events. + * Permission events require minimum priority FAN_CLASS_CONTENT. */ ret = -EINVAL; if (mask & FANOTIFY_PERM_EVENTS && - group->priority == FS_PRIO_0) + group->priority < FSNOTIFY_PRIO_CONTENT) goto fput_and_out; if (mask & FAN_FS_ERROR && @@ -1908,17 +1848,34 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* inode held in place by reference to path; group by fget on fd */ - if (mark_type == FAN_MARK_INODE) + if (mark_type == FAN_MARK_INODE) { inode = path.dentry->d_inode; - else + obj = inode; + } else { mnt = path.mnt; + if (mark_type == FAN_MARK_MOUNT) + obj = mnt; + else + obj = mnt->mnt_sb; + } - ret = mnt ? -EINVAL : -EISDIR; - /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ - if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode)) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) - goto path_put_and_out; + /* + * If some other task has this inode open for write we should not add + * an ignore mask, unless that ignore mask is supposed to survive + * modification changes anyway. + */ + if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && + !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { + ret = mnt ? -EINVAL : -EISDIR; + /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ + if (ignore == FAN_MARK_IGNORE && + (mnt || S_ISDIR(inode->i_mode))) + goto path_put_and_out; + + ret = 0; + if (inode && inode_is_open_for_write(inode)) + goto path_put_and_out; + } /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { @@ -1936,26 +1893,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (mark_cmd) { case FAN_MARK_ADD: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, - flags, fsid); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, - flags, fsid); - else - ret = fanotify_add_inode_mark(group, inode, mask, - flags, fsid); + ret = fanotify_add_mark(group, obj, obj_type, mask, flags, + fsid); break; case FAN_MARK_REMOVE: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, - flags, umask); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, - flags, umask); - else - ret = fanotify_remove_inode_mark(group, inode, mask, - flags, umask); + ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, + umask); break; default: ret = -EINVAL; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 5c430736ec12..dec553034027 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -41,29 +41,25 @@ static void show_fdinfo(struct seq_file *m, struct file *f, #if defined(CONFIG_EXPORTFS) static void show_mark_fhandle(struct seq_file *m, struct inode *inode) { - struct { - struct file_handle handle; - u8 pad[MAX_HANDLE_SZ]; - } f; + DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ); int size, ret, i; - f.handle.handle_bytes = sizeof(f.pad); - size = f.handle.handle_bytes >> 2; + size = f->handle_bytes >> 2; - ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return; } - f.handle.handle_type = ret; - f.handle.handle_bytes = size * sizeof(u32); + f->handle_type = ret; + f->handle_bytes = size * sizeof(u32); seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", - f.handle.handle_bytes, f.handle.handle_type); + f->handle_bytes, f->handle_type); - for (i = 0; i < f.handle.handle_bytes; i++) - seq_printf(m, "%02x", (int)f.handle.f_handle[i]); + for (i = 0; i < f->handle_bytes; i++) + seq_printf(m, "%02x", (int)f->f_handle[i]); } #else static void show_mark_fhandle(struct seq_file *m, struct inode *inode) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2fc105a72a8f..ff69ae24c4e8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -89,11 +89,25 @@ static void fsnotify_unmount_inodes(struct super_block *sb) void fsnotify_sb_delete(struct super_block *sb) { + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return; + fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ - wait_var_event(&sb->s_fsnotify_connectors, - !atomic_long_read(&sb->s_fsnotify_connectors)); + wait_var_event(fsnotify_sb_watched_objects(sb), + !atomic_long_read(fsnotify_sb_watched_objects(sb))); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT)); +} + +void fsnotify_sb_free(struct super_block *sb) +{ + kfree(sb->s_fsnotify_info); } /* @@ -489,6 +503,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; @@ -525,7 +540,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!sb->s_fsnotify_marks && + if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && (!inode2 || !inode2->i_fsnotify_marks)) @@ -552,8 +567,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = - fsnotify_first_mark(&sb->s_fsnotify_marks); + if (sbinfo) { + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = + fsnotify_first_mark(&sbinfo->sb_marks); + } if (mnt) { iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index fde74eb333cc..2d059f789ee3 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,39 +9,58 @@ #include "../mount.h" +/* + * fsnotify_connp_t is what we embed in objects which connector can be attached + * to. + */ +typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; + static inline struct inode *fsnotify_conn_inode( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct inode, i_fsnotify_marks); + return conn->obj; } static inline struct mount *fsnotify_conn_mount( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct mount, mnt_fsnotify_marks); + return real_mount(conn->obj); } static inline struct super_block *fsnotify_conn_sb( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct super_block, s_fsnotify_marks); + return conn->obj; } -static inline struct super_block *fsnotify_connector_sb( - struct fsnotify_mark_connector *conn) +static inline struct super_block *fsnotify_object_sb(void *obj, + enum fsnotify_obj_type obj_type) { - switch (conn->type) { + switch (obj_type) { case FSNOTIFY_OBJ_TYPE_INODE: - return fsnotify_conn_inode(conn)->i_sb; + return ((struct inode *)obj)->i_sb; case FSNOTIFY_OBJ_TYPE_VFSMOUNT: - return fsnotify_conn_mount(conn)->mnt.mnt_sb; + return ((struct vfsmount *)obj)->mnt_sb; case FSNOTIFY_OBJ_TYPE_SB: - return fsnotify_conn_sb(conn); + return (struct super_block *)obj; default: return NULL; } } +static inline struct super_block *fsnotify_connector_sb( + struct fsnotify_mark_connector *conn) +{ + return fsnotify_object_sb(conn->obj, conn->type); +} + +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + return sbinfo ? &sbinfo->sb_marks : NULL; +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -67,7 +86,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) /* run the list of all marks associated with sb and destroy them */ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) { - fsnotify_destroy_marks(&sb->s_fsnotify_marks); + fsnotify_destroy_marks(fsnotify_sb_marks(sb)); } /* diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 85d8fdd55329..4ffc30606e0b 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -544,7 +544,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int create = (arg & IN_MASK_CREATE); int ret; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); + fsn_mark = fsnotify_find_inode_mark(inode, group); if (!fsn_mark) return -ENOENT; else if (create) { diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d6944ff86ffa..c3eefa70633c 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -97,6 +97,21 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) refcount_inc(&mark->refcnt); } +static fsnotify_connp_t *fsnotify_object_connp(void *obj, + enum fsnotify_obj_type obj_type) +{ + switch (obj_type) { + case FSNOTIFY_OBJ_TYPE_INODE: + return &((struct inode *)obj)->i_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: + return &real_mount(obj)->mnt_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_SB: + return fsnotify_sb_marks(obj); + default: + return NULL; + } +} + static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) { if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) @@ -116,10 +131,69 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) return *fsnotify_conn_mask_p(conn); } +static void fsnotify_get_sb_watched_objects(struct super_block *sb) +{ + atomic_long_inc(fsnotify_sb_watched_objects(sb)); +} + +static void fsnotify_put_sb_watched_objects(struct super_block *sb) +{ + if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb))) + wake_up_var(fsnotify_sb_watched_objects(sb)); +} + static void fsnotify_get_inode_ref(struct inode *inode) { ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); + fsnotify_get_sb_watched_objects(inode->i_sb); +} + +static void fsnotify_put_inode_ref(struct inode *inode) +{ + fsnotify_put_sb_watched_objects(inode->i_sb); + iput(inode); +} + +/* + * Grab or drop watched objects reference depending on whether the connector + * is attached and has any marks attached. + */ +static void fsnotify_update_sb_watchers(struct super_block *sb, + struct fsnotify_mark_connector *conn) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; + struct fsnotify_mark *first_mark = NULL; + unsigned int highest_prio = 0; + + if (conn->obj) + first_mark = hlist_entry_safe(conn->list.first, + struct fsnotify_mark, obj_list); + if (first_mark) + highest_prio = first_mark->group->priority; + if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) + highest_prio = 0; + + /* + * If the highest priority of group watching this object is prio, + * then watched object has a reference on counters [0..prio]. + * Update priority >= 1 watched objects counters. + */ + for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) + atomic_long_inc(&sbinfo->watched_objects[p]); + for (unsigned int p = conn->prio; p > highest_prio; p--) + atomic_long_dec(&sbinfo->watched_objects[p]); + conn->prio = highest_prio; + + /* Update priority >= 0 (a.k.a total) watched objects counter */ + BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); + if (first_mark && !is_watched) { + conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; + fsnotify_get_sb_watched_objects(sb); + } else if (!first_mark && is_watched) { + conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; + fsnotify_put_sb_watched_objects(sb); + } } /* @@ -213,35 +287,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_put_inode_ref(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - iput(inode); - if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - -static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb) - atomic_long_inc(&sb->s_fsnotify_connectors); -} - -static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) { + fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); + struct super_block *sb = fsnotify_connector_sb(conn); struct inode *inode = NULL; *type = conn->type; @@ -261,10 +312,10 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } - fsnotify_put_sb_connectors(conn); - rcu_assign_pointer(*(conn->obj), NULL); + rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -316,6 +367,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { + struct super_block *sb = fsnotify_connector_sb(conn); + + /* Update watched objects after detaching mark */ + if (sb) + fsnotify_update_sb_watchers(sb, conn); objp = __fsnotify_recalc_mask(conn); type = conn->type; } @@ -536,8 +592,28 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +static int fsnotify_attach_info_to_sb(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo; + + /* sb info is freed on fsnotify_sb_delete() */ + sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); + if (!sbinfo) + return -ENOMEM; + + /* + * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() + * will observe an initialized structure + */ + if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { + /* Someone else created sbinfo for us */ + kfree(sbinfo); + } + return 0; +} + static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int obj_type) + void *obj, unsigned int obj_type) { struct fsnotify_mark_connector *conn; @@ -547,10 +623,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); conn->flags = 0; + conn->prio = 0; conn->type = obj_type; - conn->obj = connp; - conn->flags = 0; - fsnotify_get_sb_connectors(conn); + conn->obj = obj; /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -558,10 +633,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } - return 0; } @@ -598,24 +671,36 @@ out: * to which group and for which inodes. These marks are ordered according to * priority, highest number first, and then by the group's location in memory. */ -static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { + struct super_block *sb = fsnotify_object_sb(obj, obj_type); struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; + fsnotify_connp_t *connp; int cmp; int err = 0; if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) return -EINVAL; + /* + * Attach the sb info before attaching a connector to any object on sb. + * The sb info will remain attached as long as sb lives. + */ + if (!fsnotify_sb_info(sb)) { + err = fsnotify_attach_info_to_sb(sb); + if (err) + return err; + } + + connp = fsnotify_object_connp(obj, obj_type); restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, obj_type); + err = fsnotify_attach_connector_to_object(connp, obj, obj_type); if (err) return err; goto restart; @@ -649,6 +734,7 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone @@ -667,7 +753,7 @@ out_err: * event types should be delivered to which group. */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, int add_flags) { struct fsnotify_group *group = mark->group; @@ -688,7 +774,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); if (ret) goto err; @@ -706,14 +792,14 @@ err: return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { int ret; struct fsnotify_group *group = mark->group; fsnotify_group_lock(group); - ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); fsnotify_group_unlock(group); return ret; } @@ -723,12 +809,16 @@ EXPORT_SYMBOL_GPL(fsnotify_add_mark); * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, struct fsnotify_group *group) { + fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; + if (!connp) + return NULL; + conn = fsnotify_grab_connector(connp); if (!conn) return NULL; diff --git a/fs/super.c b/fs/super.c index 69ce6c600968..b72f1d288e95 100644 --- a/fs/super.c +++ b/fs/super.c @@ -274,6 +274,7 @@ static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); + fsnotify_sb_free(s); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); diff --git a/include/linux/fs.h b/include/linux/fs.h index a7e7e37ba417..f1cc6d39b61f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -73,6 +73,8 @@ struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; +struct fsnotify_mark_connector; +struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; struct fileattr; @@ -618,8 +620,6 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 -struct fsnotify_mark_connector; - /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -1248,7 +1248,7 @@ struct super_block { /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and - * s_fsnotify_marks together for cache efficiency. They are frequently + * s_fsnotify_info together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ @@ -1260,7 +1260,7 @@ struct super_block { time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; - struct fsnotify_mark_connector __rcu *s_fsnotify_marks; + struct fsnotify_sb_info *s_fsnotify_info; #endif /* @@ -1301,12 +1301,6 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; - /* - * Number of inode/mount/sb objects that are being watched, note that - * inodes objects are currently double-accounted. - */ - atomic_long_t s_fsnotify_connectors; - /* Read-only state of the superblock is being changed */ int s_readonly_remount; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1a9de119a0f7..4da80e92f804 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,10 +17,23 @@ #include <linux/slab.h> #include <linux/bug.h> +/* Are there any inode/mount/sb objects watched with priority prio or above? */ +static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, + int prio) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return false; + + return atomic_long_read(&sbinfo->watched_objects[prio]); +} + /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { - return atomic_long_read(&sb->s_fsnotify_connectors); + return fsnotify_sb_has_priority_watchers(sb, 0); } /* @@ -103,6 +116,12 @@ static inline int fsnotify_file(struct file *file, __u32 mask) return 0; path = &file->f_path; + /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ + if (mask & ALL_FSNOTIFY_PERM_EVENTS && + !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, + FSNOTIFY_PRIO_CONTENT)) + return 0; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8f40c349b228..4dd6143db271 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -177,6 +177,17 @@ struct fsnotify_event { }; /* + * fsnotify group priorities. + * Events are sent in order from highest priority to lowest priority. + */ +enum fsnotify_group_prio { + FSNOTIFY_PRIO_NORMAL = 0, /* normal notifiers, no permissions */ + FSNOTIFY_PRIO_CONTENT, /* fanotify permission events */ + FSNOTIFY_PRIO_PRE_CONTENT, /* fanotify pre-content events */ + __FSNOTIFY_PRIO_NUM +}; + +/* * A group is a "thing" that wants to receive notification about filesystem * events. The mask holds the subset of event types this group cares about. * refcnt on a group is up to the implementor and at any moment if it goes 0 @@ -201,14 +212,7 @@ struct fsnotify_group { wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ - /* - * Valid fsnotify group priorities. Events are send in order from highest - * priority to lowest priority. We default to the lowest priority. - */ - #define FS_PRIO_0 0 /* normal notifiers, no permissions */ - #define FS_PRIO_1 1 /* fanotify content based access control */ - #define FS_PRIO_2 2 /* fanotify pre-content access */ - unsigned int priority; + enum fsnotify_group_prio priority; /* priority for sending events */ bool shutdown; /* group is being shut down, don't queue more events */ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ @@ -457,13 +461,6 @@ FSNOTIFY_ITER_FUNCS(sb, SB) type++) /* - * fsnotify_connp_t is what we embed in objects which connector can be attached - * to. fsnotify_connp_t * is how we refer from connector back to object. - */ -struct fsnotify_mark_connector; -typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; - -/* * Inode/vfsmount/sb point to this structure which tracks all marks attached to * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached @@ -471,12 +468,14 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; */ struct fsnotify_mark_connector { spinlock_t lock; - unsigned short type; /* Type of object [lock] */ + unsigned char type; /* Type of object [lock] */ + unsigned char prio; /* Highest priority group */ +#define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ union { /* Object pointer [lock] */ - fsnotify_connp_t *obj; + void *obj; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; @@ -484,6 +483,37 @@ struct fsnotify_mark_connector { }; /* + * Container for per-sb fsnotify state (sb marks and more). + * Attached lazily on first marked object on the sb and freed when killing sb. + */ +struct fsnotify_sb_info { + struct fsnotify_mark_connector __rcu *sb_marks; + /* + * Number of inode/mount/sb objects that are being watched in this sb. + * Note that inodes objects are currently double-accounted. + * + * The value in watched_objects[prio] is the number of objects that are + * watched by groups of priority >= prio, so watched_objects[0] is the + * total number of watched objects in this sb. + */ + atomic_long_t watched_objects[__FSNOTIFY_PRIO_NUM]; +}; + +static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) +{ +#ifdef CONFIG_FSNOTIFY + return READ_ONCE(sb->s_fsnotify_info); +#else + return NULL; +#endif +} + +static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) +{ + return &fsnotify_sb_info(sb)->watched_objects[0]; +} + +/* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events * of a type matching mask or only interested in those events. @@ -546,6 +576,7 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) @@ -758,30 +789,35 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, - struct fsnotify_group *group); +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, + struct fsnotify_group *group); /* attach the mark to the object */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags); +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark_locked(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); +} + +static inline struct fsnotify_mark *fsnotify_find_inode_mark( + struct inode *inode, + struct fsnotify_group *group) +{ + return fsnotify_find_mark(inode, FSNOTIFY_OBJ_TYPE_INODE, group); } /* given a group and a mark, flag mark to be freed when all references are dropped */ @@ -845,6 +881,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void fsnotify_sb_free(struct super_block *sb) +{} + static inline void fsnotify_update_flags(struct dentry *dentry) {} diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 1b07e6f12a07..f2f38903b2fe 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -463,7 +463,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) int n; fsnotify_group_lock(audit_tree_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); + mark = fsnotify_find_inode_mark(inode, audit_tree_group); if (!mark) return create_chunk(inode, tree); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 7a98cd176a12..7f358740e958 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -90,7 +90,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode) struct audit_parent *parent = NULL; struct fsnotify_mark *entry; - entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group); + entry = fsnotify_find_inode_mark(inode, audit_watch_group); if (entry) parent = container_of(entry, struct audit_parent, mark); |