diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 4 | ||||
-rw-r--r-- | fs/afs/file.c | 2 | ||||
-rw-r--r-- | fs/dcache.c | 29 | ||||
-rw-r--r-- | fs/debugfs/inode.c | 149 | ||||
-rw-r--r-- | fs/devpts/inode.c | 85 | ||||
-rw-r--r-- | fs/exec.c | 14 | ||||
-rw-r--r-- | fs/namei.c | 122 | ||||
-rw-r--r-- | fs/proc/base.c | 3 | ||||
-rw-r--r-- | fs/sysfs/dir.c | 224 | ||||
-rw-r--r-- | fs/sysfs/inode.c | 11 | ||||
-rw-r--r-- | fs/sysfs/mount.c | 2 | ||||
-rw-r--r-- | fs/sysfs/sysfs.h | 17 |
12 files changed, 526 insertions, 136 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index d621f02a3f9e..aa195265362f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,10 @@ menu "File systems" +# Use unaligned word dcache accesses +config DCACHE_WORD_ACCESS + bool + if BLOCK source "fs/ext2/Kconfig" diff --git a/fs/afs/file.c b/fs/afs/file.c index 14d89fa58fee..8f6e9234d565 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping, ASSERT(key != NULL); vnode = AFS_FS_I(mapping->host); - if (vnode->flags & AFS_VNODE_DELETED) { + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { _leave(" = -ESTALE"); return -ESTALE; } diff --git a/fs/dcache.c b/fs/dcache.c index bcbdb33fcc20..11828de68dce 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -105,10 +105,10 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; static inline struct hlist_bl_head *d_hash(const struct dentry *parent, - unsigned long hash) + unsigned int hash) { - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + hash += (unsigned long) parent / L1_CACHE_BYTES; + hash = hash + (hash >> D_HASHBITS); return dentry_hashtable + (hash & D_HASHMASK); } @@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, static inline int dentry_cmp(const unsigned char *cs, size_t scount, const unsigned char *ct, size_t tcount) { +#ifdef CONFIG_DCACHE_WORD_ACCESS + unsigned long a,b,mask; + + if (unlikely(scount != tcount)) + return 1; + + for (;;) { + a = *(unsigned long *)cs; + b = *(unsigned long *)ct; + if (tcount < sizeof(unsigned long)) + break; + if (unlikely(a != b)) + return 1; + cs += sizeof(unsigned long); + ct += sizeof(unsigned long); + tcount -= sizeof(unsigned long); + if (!tcount) + return 0; + } + mask = ~(~0ul << tcount*8); + return unlikely(!!((a ^ b) & mask)); +#else if (scount != tcount) return 1; @@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, tcount--; } while (tcount); return 0; +#endif } static void __d_free(struct rcu_head *head) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 956d5ddddf6e..b80bc846a15a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -23,9 +23,13 @@ #include <linux/debugfs.h> #include <linux/fsnotify.h> #include <linux/string.h> +#include <linux/seq_file.h> +#include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> +#define DEBUGFS_DEFAULT_MODE 0755 + static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; @@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } +struct debugfs_mount_opts { + uid_t uid; + gid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, + Opt_gid, + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct debugfs_fs_info { + struct debugfs_mount_opts mount_opts; +}; + +static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + char *p; + + opts->mode = DEBUGFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + opts->uid = option; + break; + case Opt_gid: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->gid = option; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally debugfs has ignored all mount options + */ + } + } + + return 0; +} + +static int debugfs_apply_options(struct super_block *sb) +{ + struct debugfs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct debugfs_mount_opts *opts = &fsi->mount_opts; + + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; + + return 0; +} + +static int debugfs_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct debugfs_fs_info *fsi = sb->s_fs_info; + + err = debugfs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + debugfs_apply_options(sb); + +fail: + return err; +} + +static int debugfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; + struct debugfs_mount_opts *opts = &fsi->mount_opts; + + if (opts->uid != 0) + seq_printf(m, ",uid=%u", opts->uid); + if (opts->gid != 0) + seq_printf(m, ",gid=%u", opts->gid); + if (opts->mode != DEBUGFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", opts->mode); + + return 0; +} + +static const struct super_operations debugfs_super_operations = { + .statfs = simple_statfs, + .remount_fs = debugfs_remount, + .show_options = debugfs_show_options, +}; + static int debug_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr debug_files[] = {{""}}; + struct debugfs_fs_info *fsi; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) { + err = -ENOMEM; + goto fail; + } + + err = debugfs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); + if (err) + goto fail; + + sb->s_op = &debugfs_super_operations; + + debugfs_apply_options(sb); + + return 0; - return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); +fail: + kfree(fsi); + sb->s_fs_info = NULL; + return err; } static struct dentry *debug_mount(struct file_system_type *fs_type, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c4e2a58a2e82..1c6f908e38ca 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -36,7 +36,61 @@ #define DEVPTS_DEFAULT_PTMX_MODE 0000 #define PTMX_MINOR 2 -extern int pty_limit; /* Config limit on Unix98 ptys */ +/* + * sysctl support for setting limits on the number of Unix98 ptys allocated. + * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly. + */ +static int pty_limit = NR_UNIX98_PTY_DEFAULT; +static int pty_reserve = NR_UNIX98_PTY_RESERVE; +static int pty_limit_min; +static int pty_limit_max = INT_MAX; +static int pty_count; + +static struct ctl_table pty_table[] = { + { + .procname = "max", + .maxlen = sizeof(int), + .mode = 0644, + .data = &pty_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pty_limit_min, + .extra2 = &pty_limit_max, + }, { + .procname = "reserve", + .maxlen = sizeof(int), + .mode = 0644, + .data = &pty_reserve, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pty_limit_min, + .extra2 = &pty_limit_max, + }, { + .procname = "nr", + .maxlen = sizeof(int), + .mode = 0444, + .data = &pty_count, + .proc_handler = proc_dointvec, + }, + {} +}; + +static struct ctl_table pty_kern_table[] = { + { + .procname = "pty", + .mode = 0555, + .child = pty_table, + }, + {} +}; + +static struct ctl_table pty_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = pty_kern_table, + }, + {} +}; + static DEFINE_MUTEX(allocated_ptys_lock); static struct vfsmount *devpts_mnt; @@ -49,10 +103,11 @@ struct pts_mount_opts { umode_t mode; umode_t ptmxmode; int newinstance; + int max; }; enum { - Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, + Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_max, Opt_err }; @@ -63,6 +118,7 @@ static const match_table_t tokens = { #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, + {Opt_max, "max=%d"}, #endif {Opt_err, NULL} }; @@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->gid = 0; opts->mode = DEVPTS_DEFAULT_MODE; opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + opts->max = NR_UNIX98_PTY_MAX; /* newinstance makes sense only on initial mount */ if (op == PARSE_MOUNT) @@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) if (op == PARSE_MOUNT) opts->newinstance = 1; break; + case Opt_max: + if (match_int(&args[0], &option) || + option < 0 || option > NR_UNIX98_PTY_MAX) + return -EINVAL; + opts->max = option; + break; #endif default: printk(KERN_ERR "devpts: called with bogus options\n"); @@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",mode=%03o", opts->mode); #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); + if (opts->max < NR_UNIX98_PTY_MAX) + seq_printf(seq, ",max=%d", opts->max); #endif return 0; @@ -438,6 +503,12 @@ retry: return -ENOMEM; mutex_lock(&allocated_ptys_lock); + if (pty_count >= pty_limit - + (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + mutex_unlock(&allocated_ptys_lock); + return -ENOSPC; + } + ida_ret = ida_get_new(&fsi->allocated_ptys, &index); if (ida_ret < 0) { mutex_unlock(&allocated_ptys_lock); @@ -446,11 +517,12 @@ retry: return -EIO; } - if (index >= pty_limit) { + if (index >= fsi->mount_opts.max) { ida_remove(&fsi->allocated_ptys, index); mutex_unlock(&allocated_ptys_lock); - return -EIO; + return -ENOSPC; } + pty_count++; mutex_unlock(&allocated_ptys_lock); return index; } @@ -462,6 +534,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_lock(&allocated_ptys_lock); ida_remove(&fsi->allocated_ptys, idx); + pty_count--; mutex_unlock(&allocated_ptys_lock); } @@ -558,11 +631,15 @@ void devpts_pty_kill(struct tty_struct *tty) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); + struct ctl_table_header *table; + if (!err) { + table = register_sysctl_table(pty_root_table); devpts_mnt = kern_mount(&devpts_fs_type); if (IS_ERR(devpts_mnt)) { err = PTR_ERR(devpts_mnt); unregister_filesystem(&devpts_fs_type); + unregister_sysctl_table(table); } } return err; diff --git a/fs/exec.c b/fs/exec.c index 153dee14fe55..95551c6da090 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -63,6 +63,8 @@ #include <trace/events/task.h> #include "internal.h" +#include <trace/events/sched.h> + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; @@ -848,6 +850,7 @@ static int exec_mmap(struct mm_struct *mm) if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); + setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); return 0; @@ -975,8 +978,8 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: - if (current->mm) - setmax_mm_hiwater_rss(&sig->maxrss, current->mm); + /* we have changed execution domain */ + tsk->exit_signal = SIGCHLD; exit_itimers(sig); flush_itimer_signals(); @@ -1402,9 +1405,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->recursion_depth = depth; if (retval >= 0) { - if (depth == 0) - ptrace_event(PTRACE_EVENT_EXEC, - old_pid); + if (depth == 0) { + trace_sched_process_exec(current, old_pid, bprm); + ptrace_event(PTRACE_EVENT_EXEC, old_pid); + } put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/fs/namei.c b/fs/namei.c index 46ea9cc16647..fa96a26d3291 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode) return 1; } +/* + * We can do the critical dentry name comparison and hashing + * operations one word at a time, but we are limited to: + * + * - Architectures with fast unaligned word accesses. We could + * do a "get_unaligned()" if this helps and is sufficiently + * fast. + * + * - Little-endian machines (so that we can generate the mask + * of low bytes efficiently). Again, we *could* do a byte + * swapping load on big-endian architectures if that is not + * expensive enough to make the optimization worthless. + * + * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we + * do not trap on the (extremely unlikely) case of a page + * crossing operation. + * + * - Furthermore, we need an efficient 64-bit compile for the + * 64-bit case in order to generate the "number of bytes in + * the final mask". Again, that could be replaced with a + * efficient population count instruction or similar. + */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608 >> 56; +} + +static inline unsigned int fold_hash(unsigned long hash) +{ + hash += hash >> (8*sizeof(int)); + return hash; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#define fold_hash(x) (x) + +#endif + +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long a, mask; + unsigned long hash = 0; + + for (;;) { + a = *(unsigned long *)name; + hash *= 9; + if (len < sizeof(unsigned long)) + break; + hash += a; + name += sizeof(unsigned long); + len -= sizeof(unsigned long); + if (!len) + goto done; + } + mask = ~(~0ul << len*8); + hash += mask & a; +done: + return fold_hash(hash); +} +EXPORT_SYMBOL(full_name_hash); + +#define ONEBYTES 0x0101010101010101ul +#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful +#define HIGHBITS 0x8080808080808080ul + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - ONEBYTES) & ~a) & HIGHBITS; +} + +/* + * Calculate the length and hash of the path component, and + * return the length of the component; + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long a, mask, hash, len; + + hash = a = 0; + len = -sizeof(unsigned long); + do { + hash = (hash + a) * 9; + len += sizeof(unsigned long); + a = *(unsigned long *)(name+len); + /* Do we have any NUL or '/' bytes in this word? */ + mask = has_zero(a) | has_zero(a ^ SLASHBYTES); + } while (!mask); + + /* The mask *below* the first high bit set */ + mask = (mask - 1) & ~mask; + mask >>= 7; + hash += a & mask; + *hashp = fold_hash(hash); + + return len + count_masked_bytes(mask); +} + +#else + unsigned int full_name_hash(const unsigned char *name, unsigned int len) { unsigned long hash = init_name_hash(); @@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) return len; } +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into diff --git a/fs/proc/base.c b/fs/proc/base.c index d4548dd49b02..965d4bde3a3b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf, if (!p) return -ESRCH; - err = nice; - err = proc_sched_autogroup_set_nice(p, &err); + err = proc_sched_autogroup_set_nice(p, nice); if (err) count = err; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 7fdf6a7b7436..2a7a3f5d1ca6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -22,76 +22,103 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/security.h> +#include <linux/hash.h> #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); DEFINE_SPINLOCK(sysfs_assoc_lock); +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb); + static DEFINE_SPINLOCK(sysfs_ino_lock); static DEFINE_IDA(sysfs_ino_ida); /** - * sysfs_link_sibling - link sysfs_dirent into sibling list + * sysfs_name_hash + * @ns: Namespace tag to hash + * @name: Null terminated string to hash + * + * Returns 31 bit hash of ns + name (so it fits in an off_t ) + */ +static unsigned int sysfs_name_hash(const void *ns, const char *name) +{ + unsigned long hash = init_name_hash(); + unsigned int len = strlen(name); + while (len--) + hash = partial_name_hash(*name++, hash); + hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) ); + hash &= 0x7fffffffU; + /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ + if (hash < 1) + hash += 2; + if (hash >= INT_MAX) + hash = INT_MAX - 1; + return hash; +} + +static int sysfs_name_compare(unsigned int hash, const void *ns, + const char *name, const struct sysfs_dirent *sd) +{ + if (hash != sd->s_hash) + return hash - sd->s_hash; + if (ns != sd->s_ns) + return ns - sd->s_ns; + return strcmp(name, sd->s_name); +} + +static int sysfs_sd_compare(const struct sysfs_dirent *left, + const struct sysfs_dirent *right) +{ + return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name, + right); +} + +/** + * sysfs_link_subling - link sysfs_dirent into sibling rbtree * @sd: sysfs_dirent of interest * - * Link @sd into its sibling list which starts from + * Link @sd into its sibling rbtree which starts from * sd->s_parent->s_dir.children. * * Locking: * mutex_lock(sysfs_mutex) + * + * RETURNS: + * 0 on susccess -EEXIST on failure. */ -static void sysfs_link_sibling(struct sysfs_dirent *sd) +static int sysfs_link_sibling(struct sysfs_dirent *sd) { - struct sysfs_dirent *parent_sd = sd->s_parent; - - struct rb_node **p; - struct rb_node *parent; + struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; + struct rb_node *parent = NULL; if (sysfs_type(sd) == SYSFS_DIR) - parent_sd->s_dir.subdirs++; - - p = &parent_sd->s_dir.inode_tree.rb_node; - parent = NULL; - while (*p) { - parent = *p; -#define node rb_entry(parent, struct sysfs_dirent, inode_node) - if (sd->s_ino < node->s_ino) { - p = &node->inode_node.rb_left; - } else if (sd->s_ino > node->s_ino) { - p = &node->inode_node.rb_right; - } else { - printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", - (unsigned long) sd->s_ino); - BUG(); - } -#undef node - } - rb_link_node(&sd->inode_node, parent, p); - rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree); - - p = &parent_sd->s_dir.name_tree.rb_node; - parent = NULL; - while (*p) { - int c; - parent = *p; -#define node rb_entry(parent, struct sysfs_dirent, name_node) - c = strcmp(sd->s_name, node->s_name); - if (c < 0) { - p = &node->name_node.rb_left; - } else { - p = &node->name_node.rb_right; - } -#undef node + sd->s_parent->s_dir.subdirs++; + + while (*node) { + struct sysfs_dirent *pos; + int result; + + pos = to_sysfs_dirent(*node); + parent = *node; + result = sysfs_sd_compare(sd, pos); + if (result < 0) + node = &pos->s_rb.rb_left; + else if (result > 0) + node = &pos->s_rb.rb_right; + else + return -EEXIST; } - rb_link_node(&sd->name_node, parent, p); - rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); + /* add new node and rebalance the tree */ + rb_link_node(&sd->s_rb, parent, node); + rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); + return 0; } /** - * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree * @sd: sysfs_dirent of interest * - * Unlink @sd from its sibling list which starts from + * Unlink @sd from its sibling rbtree which starts from * sd->s_parent->s_dir.children. * * Locking: @@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) if (sysfs_type(sd) == SYSFS_DIR) sd->s_parent->s_dir.subdirs--; - rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); - rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree); + rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); } /** @@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd) rwsem_release(&sd->dep_map, 1, _RET_IP_); } -static int sysfs_alloc_ino(ino_t *pino) +static int sysfs_alloc_ino(unsigned int *pino) { int ino, rc; @@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino) return rc; } -static void sysfs_free_ino(ino_t ino) +static void sysfs_free_ino(unsigned int ino) { spin_lock(&sysfs_ino_lock); ida_remove(&sysfs_ino_ida, ino); @@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; + int ret; if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", @@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) return -EINVAL; } - if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) - return -EEXIST; - + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); sd->s_parent = sysfs_get(acxt->parent_sd); - sysfs_link_sibling(sd); + ret = sysfs_link_sibling(sd); + if (ret) + return ret; /* Update timestamps on the parent */ ps_iattr = acxt->parent_sd->s_iattr; @@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, const void *ns, const unsigned char *name) { - struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; - struct sysfs_dirent *found = NULL; + struct rb_node *node = parent_sd->s_dir.children.rb_node; + unsigned int hash; if (!!sysfs_ns_type(parent_sd) != !!ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", @@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, return NULL; } - while (p) { - int c; -#define node rb_entry(p, struct sysfs_dirent, name_node) - c = strcmp(name, node->s_name); - if (c < 0) { - p = node->name_node.rb_left; - } else if (c > 0) { - p = node->name_node.rb_right; - } else { - found = node; - p = node->name_node.rb_left; - } -#undef node - } - - if (found) { - while (found->s_ns != ns) { - p = rb_next(&found->name_node); - if (!p) - return NULL; - found = rb_entry(p, struct sysfs_dirent, name_node); - if (strcmp(name, found->s_name)) - return NULL; - } + hash = sysfs_name_hash(ns, name); + while (node) { + struct sysfs_dirent *sd; + int result; + + sd = to_sysfs_dirent(node); + result = sysfs_name_compare(hash, ns, name, sd); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return sd; } - - return found; + return NULL; } /** @@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); sysfs_addrm_start(&acxt, dir_sd); - pos = rb_first(&dir_sd->s_dir.inode_tree); + pos = rb_first(&dir_sd->s_dir.children); while (pos) { - struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); + struct sysfs_dirent *sd = to_sysfs_dirent(pos); pos = rb_next(pos); if (sysfs_type(sd) != SYSFS_DIR) sysfs_remove_one(&acxt, sd); @@ -863,6 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd, dup_name = sd->s_name; sd->s_name = new_name; + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); } /* Move to the appropriate place in the appropriate directories rbtree. */ @@ -919,38 +935,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp) } static struct sysfs_dirent *sysfs_dir_pos(const void *ns, - struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) + struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) { if (pos) { int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && pos->s_parent == parent_sd && - ino == pos->s_ino; + hash == pos->s_hash; sysfs_put(pos); if (!valid) pos = NULL; } - if (!pos && (ino > 1) && (ino < INT_MAX)) { - struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; - while (p) { -#define node rb_entry(p, struct sysfs_dirent, inode_node) - if (ino < node->s_ino) { - pos = node; - p = node->inode_node.rb_left; - } else if (ino > node->s_ino) { - p = node->inode_node.rb_right; - } else { - pos = node; + if (!pos && (hash > 1) && (hash < INT_MAX)) { + struct rb_node *node = parent_sd->s_dir.children.rb_node; + while (node) { + pos = to_sysfs_dirent(node); + + if (hash < pos->s_hash) + node = node->rb_left; + else if (hash > pos->s_hash) + node = node->rb_right; + else break; - } -#undef node } } + /* Skip over entries in the wrong namespace */ while (pos && pos->s_ns != ns) { - struct rb_node *p = rb_next(&pos->inode_node); - if (!p) + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) pos = NULL; else - pos = rb_entry(p, struct sysfs_dirent, inode_node); + pos = to_sysfs_dirent(node); } return pos; } @@ -960,11 +974,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, { pos = sysfs_dir_pos(ns, parent_sd, ino, pos); if (pos) do { - struct rb_node *p = rb_next(&pos->inode_node); - if (!p) + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) pos = NULL; else - pos = rb_entry(p, struct sysfs_dirent, inode_node); + pos = to_sysfs_dirent(node); } while (pos && pos->s_ns != ns); return pos; } @@ -1006,7 +1020,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) len = strlen(name); ino = pos->s_ino; type = dt_type(pos); - filp->f_pos = ino; + filp->f_pos = pos->s_hash; filp->private_data = sysfs_get(pos); mutex_unlock(&sysfs_mutex); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 85eb81683a29..feb2d69396cf 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec void *old_secdata; size_t old_secdata_len; - iattrs = sd->s_iattr; - if (!iattrs) - iattrs = sysfs_init_inode_attrs(sd); - if (!iattrs) - return -ENOMEM; + if (!sd->s_iattr) { + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + } + iattrs = sd->s_iattr; old_secdata = iattrs->ia_secdata; old_secdata_len = iattrs->ia_secdata_len; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index e34f0d99ea4e..140f26a34288 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = { .s_name = "", .s_count = ATOMIC_INIT(1), .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), - .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + .s_mode = S_IFDIR | S_IRUGO | S_IXUGO, .s_ino = 1, }; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 7484a36ee678..661a9639570b 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -20,9 +20,8 @@ struct sysfs_elem_dir { struct kobject *kobj; unsigned long subdirs; - - struct rb_root inode_tree; - struct rb_root name_tree; + /* children rbtree starts here and goes through sd->s_rb */ + struct rb_root children; }; struct sysfs_elem_symlink { @@ -62,8 +61,7 @@ struct sysfs_dirent { struct sysfs_dirent *s_parent; const char *s_name; - struct rb_node inode_node; - struct rb_node name_node; + struct rb_node s_rb; union { struct completion *completion; @@ -71,6 +69,7 @@ struct sysfs_dirent { } u; const void *s_ns; /* namespace tag */ + unsigned int s_hash; /* ns + name hash */ union { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; @@ -78,9 +77,9 @@ struct sysfs_dirent { struct sysfs_elem_bin_attr s_bin_attr; }; - unsigned int s_flags; + unsigned short s_flags; umode_t s_mode; - ino_t s_ino; + unsigned int s_ino; struct sysfs_inode_attrs *s_iattr; }; @@ -95,11 +94,11 @@ struct sysfs_dirent { #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) /* identify any namespace tag on sysfs_dirents */ -#define SYSFS_NS_TYPE_MASK 0xff00 +#define SYSFS_NS_TYPE_MASK 0xf00 #define SYSFS_NS_TYPE_SHIFT 8 #define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) -#define SYSFS_FLAG_REMOVED 0x020000 +#define SYSFS_FLAG_REMOVED 0x02000 static inline unsigned int sysfs_type(struct sysfs_dirent *sd) { |