diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/array.c | 20 | ||||
-rw-r--r-- | fs/proc/base.c | 269 | ||||
-rw-r--r-- | fs/proc/fd.c | 8 | ||||
-rw-r--r-- | fs/proc/generic.c | 2 | ||||
-rw-r--r-- | fs/proc/inode.c | 15 | ||||
-rw-r--r-- | fs/proc/internal.h | 3 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 25 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 3 | ||||
-rw-r--r-- | fs/proc/page.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 23 | ||||
-rw-r--r-- | fs/proc/root.c | 58 | ||||
-rw-r--r-- | fs/proc/stat.c | 10 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 21 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 2 |
16 files changed, 260 insertions, 204 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 7151ea428041..12c6922c913c 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -4,6 +4,7 @@ obj-y += proc.o +CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,) proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := task_mmu.o diff --git a/fs/proc/array.c b/fs/proc/array.c index b6c00ce0e29e..88c7de12197b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -83,6 +83,7 @@ #include <linux/tracehook.h> #include <linux/string_helpers.h> #include <linux/user_namespace.h> +#include <linux/fs_struct.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -139,12 +140,25 @@ static inline const char *get_task_state(struct task_struct *tsk) return task_state_array[fls(state)]; } +static inline int get_task_umask(struct task_struct *tsk) +{ + struct fs_struct *fs; + int umask = -ENOENT; + + task_lock(tsk); + fs = tsk->fs; + if (fs) + umask = fs->umask; + task_unlock(tsk); + return umask; +} + static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; - int g; + int g, umask; struct task_struct *tracer; const struct cred *cred; pid_t ppid, tpid = 0, tgid, ngid; @@ -162,6 +176,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, ngid = task_numa_group_id(p); cred = get_task_cred(p); + umask = get_task_umask(p); + if (umask >= 0) + seq_printf(m, "Umask:\t%#04o\n", umask); + task_lock(p); if (p->files) max_fds = files_fdtable(p->files)->max_fds; diff --git a/fs/proc/base.c b/fs/proc/base.c index da8b1943ba04..ac0df4dde823 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -579,11 +579,8 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, unsigned long totalpages = totalram_pages + total_swap_pages; unsigned long points = 0; - read_lock(&tasklist_lock); - if (pid_alive(task)) - points = oom_badness(task, NULL, NULL, totalpages) * - 1000 / totalpages; - read_unlock(&tasklist_lock); + points = oom_badness(task, NULL, NULL, totalpages) * + 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; @@ -1024,23 +1021,107 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; - unsigned long flags; if (!task) return -ESRCH; - if (lock_task_sighand(task, &flags)) { - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) - oom_adj = OOM_ADJUST_MAX; - else - oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / - OOM_SCORE_ADJ_MAX; - unlock_task_sighand(task, &flags); - } + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } +static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) +{ + static DEFINE_MUTEX(oom_adj_mutex); + struct mm_struct *mm = NULL; + struct task_struct *task; + int err = 0; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mutex_lock(&oom_adj_mutex); + if (legacy) { + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_unlock; + } + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + } else { + if ((short)oom_adj < task->signal->oom_score_adj_min && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_unlock; + } + } + + /* + * Make sure we will check other processes sharing the mm if this is + * not vfrok which wants its own oom_score_adj. + * pin the mm so it doesn't go away and get reused after task_unlock + */ + if (!task->vfork_done) { + struct task_struct *p = find_lock_task_mm(task); + + if (p) { + if (atomic_read(&p->mm->mm_users) > 1) { + mm = p->mm; + atomic_inc(&mm->mm_count); + } + task_unlock(p); + } + } + + task->signal->oom_score_adj = oom_adj; + if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) + task->signal->oom_score_adj_min = (short)oom_adj; + trace_oom_score_adj_update(task); + + if (mm) { + struct task_struct *p; + + rcu_read_lock(); + for_each_process(p) { + if (same_thread_group(task, p)) + continue; + + /* do not touch kernel threads or the global init */ + if (p->flags & PF_KTHREAD || is_global_init(p)) + continue; + + task_lock(p); + if (!p->vfork_done && process_shares_mm(p, mm)) { + pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", + task_pid_nr(p), p->comm, + p->signal->oom_score_adj, oom_adj, + task_pid_nr(task), task->comm); + p->signal->oom_score_adj = oom_adj; + if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) + p->signal->oom_score_adj_min = (short)oom_adj; + } + task_unlock(p); + } + rcu_read_unlock(); + mmdrop(mm); + } +err_unlock: + mutex_unlock(&oom_adj_mutex); + put_task_struct(task); + return err; +} + /* * /proc/pid/oom_adj exists solely for backwards compatibility with previous * kernels. The effective policy is defined by oom_score_adj, which has a @@ -1054,10 +1135,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task; char buffer[PROC_NUMBUF]; int oom_adj; - unsigned long flags; int err; memset(buffer, 0, sizeof(buffer)); @@ -1077,23 +1156,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file_inode(file)); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum * value is always attainable. @@ -1103,27 +1165,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, else oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; - if (oom_adj < task->signal->oom_score_adj && - !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - /* - * /proc/pid/oom_adj is provided for legacy purposes, ask users to use - * /proc/pid/oom_score_adj instead. - */ - pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), task_pid_nr(task), - task_pid_nr(task)); - - task->signal->oom_score_adj = oom_adj; - trace_oom_score_adj_update(task); -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); + err = __set_oom_adj(file, oom_adj, true); out: return err < 0 ? err : count; } @@ -1140,15 +1182,11 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; - unsigned long flags; size_t len; if (!task) return -ESRCH; - if (lock_task_sighand(task, &flags)) { - oom_score_adj = task->signal->oom_score_adj; - unlock_task_sighand(task, &flags); - } + oom_score_adj = task->signal->oom_score_adj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); @@ -1157,9 +1195,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task; char buffer[PROC_NUMBUF]; - unsigned long flags; int oom_score_adj; int err; @@ -1180,39 +1216,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file_inode(file)); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - - if ((short)oom_score_adj < task->signal->oom_score_adj_min && - !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - task->signal->oom_score_adj = (short)oom_score_adj; - if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) - task->signal->oom_score_adj_min = (short)oom_score_adj; - trace_oom_score_adj_update(task); - -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); + err = __set_oom_adj(file, oom_score_adj, false); out: return err < 0 ? err : count; } @@ -1815,12 +1819,17 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, child = d_hash_and_lookup(dir, &qname); if (!child) { - child = d_alloc(dir, &qname); - if (!child) - goto end_instantiate; - if (instantiate(d_inode(dir), child, task, ptr) < 0) { - dput(child); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + child = d_alloc_parallel(dir, &qname, &wq); + if (IS_ERR(child)) goto end_instantiate; + if (d_in_lookup(child)) { + int err = instantiate(d_inode(dir), child, task, ptr); + d_lookup_done(child); + if (err < 0) { + dput(child); + goto end_instantiate; + } } } inode = d_inode(child); @@ -2150,8 +2159,8 @@ out: static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, - .iterate = proc_map_files_readdir, - .llseek = default_llseek, + .iterate_shared = proc_map_files_readdir, + .llseek = generic_file_llseek, }; #ifdef CONFIG_CHECKPOINT_RESTORE @@ -2498,8 +2507,8 @@ static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, - .iterate = proc_attr_dir_readdir, - .llseek = default_llseek, + .iterate_shared = proc_attr_dir_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, @@ -2906,8 +2915,8 @@ static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, - .iterate = proc_tgid_base_readdir, - .llseek = default_llseek, + .iterate_shared = proc_tgid_base_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -3153,6 +3162,44 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) } /* + * proc_tid_comm_permission is a special permission function exclusively + * used for the node /proc/<pid>/task/<tid>/comm. + * It bypasses generic permission checks in the case where a task of the same + * task group attempts to access the node. + * The rationale behind this is that glibc and bionic access this node for + * cross thread naming (pthread_set/getname_np(!self)). However, if + * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, + * which locks out the cross thread naming implementation. + * This function makes sure that the node is always accessible for members of + * same thread group. + */ +static int proc_tid_comm_permission(struct inode *inode, int mask) +{ + bool is_same_tgroup; + struct task_struct *task; + + task = get_proc_task(inode); + if (!task) + return -ESRCH; + is_same_tgroup = same_thread_group(current, task); + put_task_struct(task); + + if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { + /* This file (/proc/<pid>/task/<tid>/comm) can always be + * read or written by the members of the corresponding + * thread group. + */ + return 0; + } + + return generic_permission(inode, mask); +} + +static const struct inode_operations proc_tid_comm_inode_operations = { + .permission = proc_tid_comm_permission, +}; + +/* * Tasks */ static const struct pid_entry tid_base_stuff[] = { @@ -3170,7 +3217,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif - REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), + NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, + &proc_tid_comm_inode_operations, + &proc_pid_set_comm_operations, {}), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif @@ -3254,8 +3303,8 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, - .iterate = proc_tid_base_readdir, - .llseek = default_llseek, + .iterate_shared = proc_tid_base_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { @@ -3465,6 +3514,6 @@ static const struct inode_operations proc_task_inode_operations = { static const struct file_operations proc_task_operations = { .read = generic_read_dir, - .iterate = proc_task_readdir, - .llseek = default_llseek, + .iterate_shared = proc_task_readdir, + .llseek = generic_file_llseek, }; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 56afa5ef08f2..01df23cc81f6 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -276,8 +276,8 @@ static int proc_readfd(struct file *file, struct dir_context *ctx) const struct file_operations proc_fd_operations = { .read = generic_read_dir, - .iterate = proc_readfd, - .llseek = default_llseek, + .iterate_shared = proc_readfd, + .llseek = generic_file_llseek, }; static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, @@ -361,6 +361,6 @@ const struct inode_operations proc_fdinfo_inode_operations = { const struct file_operations proc_fdinfo_operations = { .read = generic_read_dir, - .iterate = proc_readfdinfo, - .llseek = default_llseek, + .iterate_shared = proc_readfdinfo, + .llseek = generic_file_llseek, }; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ff3ffc76a937..c633476616e0 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -318,7 +318,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = proc_readdir, + .iterate_shared = proc_readdir, }; /* diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 42305ddcbaa0..c1b72388e571 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -457,17 +457,30 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return inode; } -int proc_fill_super(struct super_block *s) +int proc_fill_super(struct super_block *s, void *data, int silent) { + struct pid_namespace *ns = get_pid_ns(s->s_fs_info); struct inode *root_inode; int ret; + if (!proc_parse_options(data, ns)) + return -EINVAL; + + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; s->s_op = &proc_sops; s->s_time_gran = 1; + + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa2781095bd1..7931c558c192 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -212,7 +212,7 @@ extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *); +extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -268,6 +268,7 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; +extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); extern int proc_remount(struct super_block *, int *, char *); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 83720460c5bc..b9a8c813e5e6 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -40,13 +40,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); - cached = global_page_state(NR_FILE_PAGES) - + cached = global_node_page_state(NR_FILE_PAGES) - total_swapcache_pages() - i.bufferram; if (cached < 0) cached = 0; for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) - pages[lru] = global_page_state(NR_LRU_BASE + lru); + pages[lru] = global_node_page_state(NR_LRU_BASE + lru); available = si_mem_available(); @@ -105,6 +105,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE "AnonHugePages: %8lu kB\n" + "ShmemHugePages: %8lu kB\n" + "ShmemPmdMapped: %8lu kB\n" #endif #ifdef CONFIG_CMA "CmaTotal: %8lu kB\n" @@ -136,23 +138,23 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif K(i.totalswap), K(i.freeswap), - K(global_page_state(NR_FILE_DIRTY)), - K(global_page_state(NR_WRITEBACK)), - K(global_page_state(NR_ANON_PAGES)), - K(global_page_state(NR_FILE_MAPPED)), + K(global_node_page_state(NR_FILE_DIRTY)), + K(global_node_page_state(NR_WRITEBACK)), + K(global_node_page_state(NR_ANON_MAPPED)), + K(global_node_page_state(NR_FILE_MAPPED)), K(i.sharedram), K(global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), - global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, + global_page_state(NR_KERNEL_STACK_KB), K(global_page_state(NR_PAGETABLE)), #ifdef CONFIG_QUICKLIST K(quicklist_total_size()), #endif - K(global_page_state(NR_UNSTABLE_NFS)), + K(global_node_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), - K(global_page_state(NR_WRITEBACK_TEMP)), + K(global_node_page_state(NR_WRITEBACK_TEMP)), K(vm_commit_limit()), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, @@ -162,8 +164,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR) + , K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR) + , K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR) + , K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR) #endif #ifdef CONFIG_CMA , K(totalcma_pages) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 72cb26f85d58..51b8b0a8ad91 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -139,7 +139,8 @@ out: const struct file_operations proc_ns_dir_operations = { .read = generic_read_dir, - .iterate = proc_ns_dir_readdir, + .iterate_shared = proc_ns_dir_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, diff --git a/fs/proc/page.c b/fs/proc/page.c index 712f1b9992cc..3ecd445e830d 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -142,7 +142,7 @@ u64 stable_page_flags(struct page *page) /* - * Caveats on high order pages: page->_count will only be set + * Caveats on high order pages: page->_refcount will only be set * -1 on the head page; SLUB/SLQB do the same for PG_slab; * SLOB won't set PG_slab at all on compound pages. */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 350984a19c83..c8bbc68cdb05 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -179,7 +179,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) const struct file_operations proc_net_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = proc_tgid_net_readdir, + .iterate_shared = proc_tgid_net_readdir, }; static __net_init int proc_net_ns_init(struct net *net) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fe5b6e6c4671..1b93650dda2f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -474,7 +474,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, { struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; - struct qstr *name = &dentry->d_name; + const struct qstr *name = &dentry->d_name; struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); @@ -623,22 +623,23 @@ static bool proc_sys_fill_cache(struct file *file, qname.name = table->procname; qname.len = strlen(table->procname); - qname.hash = full_name_hash(qname.name, qname.len); + qname.hash = full_name_hash(dir, qname.name, qname.len); child = d_lookup(dir, &qname); if (!child) { - child = d_alloc(dir, &qname); - if (child) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + child = d_alloc_parallel(dir, &qname, &wq); + if (IS_ERR(child)) + return false; + if (d_in_lookup(child)) { inode = proc_sys_make_inode(dir->d_sb, head, table); if (!inode) { + d_lookup_done(child); dput(child); return false; - } else { - d_set_d_op(child, &proc_sys_dentry_operations); - d_add(child, inode); } - } else { - return false; + d_set_d_op(child, &proc_sys_dentry_operations); + d_add(child, inode); } } inode = d_inode(child); @@ -789,7 +790,7 @@ static const struct file_operations proc_sys_file_operations = { static const struct file_operations proc_sys_dir_file_operations = { .read = generic_read_dir, - .iterate = proc_sys_readdir, + .iterate_shared = proc_sys_readdir, .llseek = generic_file_llseek, }; @@ -833,7 +834,7 @@ static int sysctl_is_seen(struct ctl_table_header *p) return res; } -static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, +static int proc_sys_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct ctl_table_header *head; diff --git a/fs/proc/root.c b/fs/proc/root.c index 361ab4ee42fc..8d3e484055a6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -23,21 +23,6 @@ #include "internal.h" -static int proc_test_super(struct super_block *sb, void *data) -{ - return sb->s_fs_info == data; -} - -static int proc_set_super(struct super_block *sb, void *data) -{ - int err = set_anon_super(sb, NULL); - if (!err) { - struct pid_namespace *ns = (struct pid_namespace *)data; - sb->s_fs_info = get_pid_ns(ns); - } - return err; -} - enum { Opt_gid, Opt_hidepid, Opt_err, }; @@ -48,7 +33,7 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; -static int proc_parse_options(char *options, struct pid_namespace *pid) +int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -100,45 +85,16 @@ int proc_remount(struct super_block *sb, int *flags, char *data) static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - int err; - struct super_block *sb; struct pid_namespace *ns; - char *options; if (flags & MS_KERNMOUNT) { - ns = (struct pid_namespace *)data; - options = NULL; + ns = data; + data = NULL; } else { ns = task_active_pid_ns(current); - options = data; - - /* Does the mounter have privilege over the pid namespace? */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - } - - sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); - if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (!proc_parse_options(options, ns)) { - deactivate_locked_super(sb); - return ERR_PTR(-EINVAL); - } - - if (!sb->s_root) { - err = proc_fill_super(sb); - if (err) { - deactivate_locked_super(sb); - return ERR_PTR(err); - } - - sb->s_flags |= MS_ACTIVE; - /* User space would break if executables appear on proc */ - sb->s_iflags |= SB_I_NOEXEC; } - return dget(sb->s_root); + return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); } static void proc_kill_sb(struct super_block *sb) @@ -158,7 +114,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -226,8 +182,8 @@ static int proc_root_readdir(struct file *file, struct dir_context *ctx) */ static const struct file_operations proc_root_operations = { .read = generic_read_dir, - .iterate = proc_root_readdir, - .llseek = default_llseek, + .iterate_shared = proc_root_readdir, + .llseek = generic_file_llseek, }; /* diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 510413eb25b8..7907e456ac4f 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -80,19 +80,17 @@ static u64 get_iowait_time(int cpu) static int show_stat(struct seq_file *p, void *v) { int i, j; - unsigned long jif; u64 user, nice, system, idle, iowait, irq, softirq, steal; u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; - struct timespec boottime; + struct timespec64 boottime; user = nice = system = idle = iowait = irq = softirq = steal = 0; guest = guest_nice = 0; - getboottime(&boottime); - jif = boottime.tv_sec; + getboottime64(&boottime); for_each_possible_cpu(i) { user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; @@ -163,12 +161,12 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "\nctxt %llu\n" - "btime %lu\n" + "btime %llu\n" "processes %lu\n" "procs_running %lu\n" "procs_blocked %lu\n", nr_context_switches(), - (unsigned long)jif, + (unsigned long long)boottime.tv_sec, total_forks, nr_running(), nr_iowait()); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 541583510cfb..187d84ef9de9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -448,6 +448,7 @@ struct mem_size_stats { unsigned long referenced; unsigned long anonymous; unsigned long anonymous_thp; + unsigned long shmem_thp; unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; @@ -576,7 +577,12 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); if (IS_ERR_OR_NULL(page)) return; - mss->anonymous_thp += HPAGE_PMD_SIZE; + if (PageAnon(page)) + mss->anonymous_thp += HPAGE_PMD_SIZE; + else if (PageSwapBacked(page)) + mss->shmem_thp += HPAGE_PMD_SIZE; + else + VM_BUG_ON_PAGE(1, page); smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); } #else @@ -770,6 +776,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) "Referenced: %8lu kB\n" "Anonymous: %8lu kB\n" "AnonHugePages: %8lu kB\n" + "ShmemPmdMapped: %8lu kB\n" "Shared_Hugetlb: %8lu kB\n" "Private_Hugetlb: %7lu kB\n" "Swap: %8lu kB\n" @@ -787,6 +794,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss.referenced >> 10, mss.anonymous >> 10, mss.anonymous_thp >> 10, + mss.shmem_thp >> 10, mss.shared_hugetlb >> 10, mss.private_hugetlb >> 10, mss.swap >> 10, @@ -1027,11 +1035,15 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { + if (down_write_killable(&mm->mmap_sem)) { + count = -EINTR; + goto out_mm; + } + /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ - down_write(&mm->mmap_sem); reset_mm_hiwater_rss(mm); up_write(&mm->mmap_sem); goto out_mm; @@ -1043,7 +1055,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); + if (down_write_killable(&mm->mmap_sem)) { + count = -EINTR; + goto out_mm; + } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 8afe10cf7df8..8ab782d8b33d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1071,7 +1071,7 @@ static int __init parse_crash_elf32_headers(void) /* Do some basic Verification. */ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || (ehdr.e_type != ET_CORE) || - !elf_check_arch(&ehdr) || + !vmcore_elf32_check_arch(&ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS32|| ehdr.e_ident[EI_VERSION] != EV_CURRENT || ehdr.e_version != EV_CURRENT || |