From b502bd1152472dc1b98c60434f23c23b280c7b94 Mon Sep 17 00:00:00 2001 From: Muthu Kumar Date: Fri, 23 Mar 2012 15:01:50 -0700 Subject: magic.h: move some FS magic numbers into magic.h - Move open-coded filesystem magic numbers into magic.h - Rearrange magic.h so that the filesystem-related constants are grouped together. Signed-off-by: Muthukumar R Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 18 ++++++++++++------ include/linux/pipe_fs_i.h | 2 -- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/magic.h b/include/linux/magic.h index b7ed4759dbb2..e15192cb9cf4 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -9,7 +9,6 @@ #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ #define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 -#define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 #define SELINUX_MAGIC 0xf97cff8c #define RAMFS_MAGIC 0x858458f6 /* some random number */ @@ -27,7 +26,6 @@ #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -#define ANON_INODE_FS_MAGIC 0x09041934 #define PSTOREFS_MAGIC 0x6165676C #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ @@ -40,7 +38,6 @@ #define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ #define NFS_SUPER_MAGIC 0x6969 #define OPENPROM_SUPER_MAGIC 0x9fa1 -#define PROC_SUPER_MAGIC 0x9fa0 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ @@ -52,15 +49,24 @@ #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" #define SMB_SUPER_MAGIC 0x517B -#define USBDEVICE_SUPER_MAGIC 0x9fa2 #define CGROUP_SUPER_MAGIC 0x27e0eb -#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define STACK_END_MAGIC 0x57AC6E9D +#define V9FS_MAGIC 0x01021997 + +#define BDEVFS_MAGIC 0x62646576 +#define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA +#define PIPEFS_MAGIC 0x50495045 +#define PROC_SUPER_MAGIC 0x9fa0 #define SOCKFS_MAGIC 0x534F434B -#define V9FS_MAGIC 0x01021997 +#define SYSFS_MAGIC 0x62656572 +#define USBDEVICE_SUPER_MAGIC 0x9fa2 +#define MTD_INODE_FS_MAGIC 0x11307854 +#define ANON_INODE_FS_MAGIC 0x09041934 + #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 77257c92155a..6d626ff0cfd0 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -1,8 +1,6 @@ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H -#define PIPEFS_MAGIC 0x50495045 - #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ -- cgit v1.2.3 From d314d74c695f967e10598467a326f41c78ed1e20 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 23 Mar 2012 15:01:51 -0700 Subject: nmi watchdog: do not use cpp symbol in Kconfig ARCH_HAS_NMI_WATCHDOG is a macro defined by arch, but config HARDLOCKUP_DETECTOR depends on it. This is wrong, ARCH_HAS_NMI_WATCHDOG has to be a Kconfig config, and arch's need it should select it explicitly. Signed-off-by: WANG Cong Acked-by: Don Zickus Acked-by: Mike Frysinger Cc: David Howells Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 2d304efc89df..db50840e6355 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,7 +14,7 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) #include extern void touch_nmi_watchdog(void); #else -- cgit v1.2.3 From 10db4e1e4e9a910a26b94045660e5ba7e7c71419 Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Fri, 23 Mar 2012 15:01:51 -0700 Subject: headers: include linux/types.h where appropriate This addresses some header check warnings. DRM headers which include "drm.h" have been excluded, as they indirectly include types.h. Signed-off-by: Bobby Powers Cc: Chris Ball Cc: Dave Airlie Cc: James Bottomley Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/drm/drm_mode.h | 2 ++ include/linux/mmc/ioctl.h | 3 +++ include/scsi/scsi_netlink.h | 2 +- include/sound/compress_params.h | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 2a2acda8b437..4a0aae38e160 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -27,6 +27,8 @@ #ifndef _DRM_MODE_H #define _DRM_MODE_H +#include + #define DRM_DISPLAY_INFO_LEN 32 #define DRM_CONNECTOR_NAME_LEN 32 #define DRM_DISPLAY_MODE_LEN 32 diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h index 8fa5bc5f8059..1f5e68923929 100644 --- a/include/linux/mmc/ioctl.h +++ b/include/linux/mmc/ioctl.h @@ -1,5 +1,8 @@ #ifndef LINUX_MMC_IOCTL_H #define LINUX_MMC_IOCTL_H + +#include + struct mmc_ioc_cmd { /* Implies direction of data. true = write, false = read */ int write_flag; diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h index 58ce8fe44783..5cb20ccb1956 100644 --- a/include/scsi/scsi_netlink.h +++ b/include/scsi/scsi_netlink.h @@ -23,7 +23,7 @@ #define SCSI_NETLINK_H #include - +#include /* * This file intended to be included by both kernel and user space diff --git a/include/sound/compress_params.h b/include/sound/compress_params.h index d97d69f81a7d..da4a456de032 100644 --- a/include/sound/compress_params.h +++ b/include/sound/compress_params.h @@ -51,6 +51,8 @@ #ifndef __SND_COMPRESS_PARAMS_H #define __SND_COMPRESS_PARAMS_H +#include + /* AUDIO CODECS SUPPORTED */ #define MAX_NUM_CODECS 32 #define MAX_NUM_CODEC_DESCRIPTORS 32 -- cgit v1.2.3 From 7ccaba5314caf3a2b1052edb3146ccc969b4d466 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Mar 2012 15:01:52 -0700 Subject: consolidate WARN_...ONCE() static variables Due to the alignment of following variables, these typically consume more than just the single byte that 'bool' requires, and as there are a few hundred instances, the cache pollution (not so much the waste of memory) sums up. Put these variables into their own section, outside of any half way frequently used memory range. Do the same also to the __warned variable of rcu_lockdep_assert(). (Don't, however, include the ones used by printk_once() and alike, as they can potentially be hot.) Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 6 +++--- include/asm-generic/vmlinux.lds.h | 1 + include/linux/rcupdate.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 84458b0c38d1..2520a6e241dc 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -134,7 +134,7 @@ extern void warn_slowpath_null(const char *file, const int line); #endif #define WARN_ON_ONCE(condition) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ @@ -144,7 +144,7 @@ extern void warn_slowpath_null(const char *file, const int line); }) #define WARN_ONCE(condition, format...) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ @@ -154,7 +154,7 @@ extern void warn_slowpath_null(const char *file, const int line); }) #define WARN_TAINT_ONCE(condition, taint, format...) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b5e2e4c6b017..798603e8ec38 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -167,6 +167,7 @@ CPU_KEEP(exit.data) \ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ + *(.data.unlikely) \ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 937217425c47..2c62594b67dd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -418,7 +418,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_lockdep_assert(c, s) \ do { \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ -- cgit v1.2.3 From ebec18a6d3aa1e7d84aab16225e87fd25170ec2b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 23 Mar 2012 15:01:54 -0700 Subject: prctl: add PR_{SET,GET}_CHILD_SUBREAPER to allow simple process supervision Userspace service managers/supervisors need to track their started services. Many services daemonize by double-forking and get implicitly re-parented to PID 1. The service manager will no longer be able to receive the SIGCHLD signals for them, and is no longer in charge of reaping the children with wait(). All information about the children is lost at the moment PID 1 cleans up the re-parented processes. With this prctl, a service manager process can mark itself as a sort of 'sub-init', able to stay as the parent for all orphaned processes created by the started services. All SIGCHLD signals will be delivered to the service manager. Receiving SIGCHLD and doing wait() is in cases of a service-manager much preferred over any possible asynchronous notification about specific PIDs, because the service manager has full access to the child process data in /proc and the PID can not be re-used until the wait(), the service-manager itself is in charge of, has happened. As a side effect, the relevant parent PID information does not get lost by a double-fork, which results in a more elaborate process tree and 'ps' output: before: # ps afx 253 ? Ss 0:00 /bin/dbus-daemon --system --nofork 294 ? Sl 0:00 /usr/libexec/polkit-1/polkitd 328 ? S 0:00 /usr/sbin/modem-manager 608 ? Sl 0:00 /usr/libexec/colord 658 ? Sl 0:00 /usr/libexec/upowerd 819 ? Sl 0:00 /usr/libexec/imsettings-daemon 916 ? Sl 0:00 /usr/libexec/udisks-daemon 917 ? S 0:00 \_ udisks-daemon: not polling any devices after: # ps afx 294 ? Ss 0:00 /bin/dbus-daemon --system --nofork 426 ? Sl 0:00 \_ /usr/libexec/polkit-1/polkitd 449 ? S 0:00 \_ /usr/sbin/modem-manager 635 ? Sl 0:00 \_ /usr/libexec/colord 705 ? Sl 0:00 \_ /usr/libexec/upowerd 959 ? Sl 0:00 \_ /usr/libexec/udisks-daemon 960 ? S 0:00 | \_ udisks-daemon: not polling any devices 977 ? Sl 0:00 \_ /usr/libexec/packagekitd This prctl is orthogonal to PID namespaces. PID namespaces are isolated from each other, while a service management process usually requires the services to live in the same namespace, to be able to talk to each other. Users of this will be the systemd per-user instance, which provides init-like functionality for the user's login session and D-Bus, which activates bus services on-demand. Both need init-like capabilities to be able to properly keep track of the services they start. Many thanks to Oleg for several rounds of review and insights. [akpm@linux-foundation.org: fix comment layout and spelling] [akpm@linux-foundation.org: add lengthy code comment from Oleg] Reviewed-by: Oleg Nesterov Signed-off-by: Lennart Poettering Signed-off-by: Kay Sievers Acked-by: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 3 +++ include/linux/sched.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a0413ac3abe8..e0cfec2490aa 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -121,4 +121,7 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0c147a4260a5..0c3854b0d4b1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -553,6 +553,18 @@ struct signal_struct { int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + /* POSIX.1b Interval Timers */ struct list_head posix_timers; -- cgit v1.2.3 From 7be865ab8634d4ec2a6bdb9459b268cd60e832af Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:01 -0700 Subject: backlight: new backlight driver for LP855x devices THis driver supports TI LP8550/LP8551/LP8552/LP8553/LP8556 backlight devices. The brightness can be controlled by the I2C or PWM input. The lp855x driver provides both modes. For the PWM control, pwm-specific functions can be defined in the platform data. And some information can be read via the sysfs(lp855x device attributes). For details, please refer to Documentation/backlight/lp855x-driver.txt. [axel.lin@gmail.com: add missing mutex_unlock in lp855x_read_byte() error path] [axel.lin@gmail.com: check platform data in lp855x_probe()] [axel.lin@gmail.com: small cleanups] [dan.carpenter@oracle.com: silence a compiler warning] [axel.lin@gmail.com: use id->driver_data to differentiate lp855x chips] [akpm@linux-foundation.org: simplify boolean return expression] Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Axel Lin Signed-off-by: Dan Carpenter Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lp855x.h | 131 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 include/linux/lp855x.h (limited to 'include') diff --git a/include/linux/lp855x.h b/include/linux/lp855x.h new file mode 100644 index 000000000000..781a490a451b --- /dev/null +++ b/include/linux/lp855x.h @@ -0,0 +1,131 @@ +/* + * LP855x Backlight Driver + * + * Copyright (C) 2011 Texas Instruments + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _LP855X_H +#define _LP855X_H + +#define BL_CTL_SHFT (0) +#define BRT_MODE_SHFT (1) +#define BRT_MODE_MASK (0x06) + +/* Enable backlight. Only valid when BRT_MODE=10(I2C only) */ +#define ENABLE_BL (1) +#define DISABLE_BL (0) + +#define I2C_CONFIG(id) id ## _I2C_CONFIG +#define PWM_CONFIG(id) id ## _PWM_CONFIG + +/* DEVICE CONTROL register - LP8550 */ +#define LP8550_PWM_CONFIG (LP8550_PWM_ONLY << BRT_MODE_SHFT) +#define LP8550_I2C_CONFIG ((ENABLE_BL << BL_CTL_SHFT) | \ + (LP8550_I2C_ONLY << BRT_MODE_SHFT)) + +/* DEVICE CONTROL register - LP8551 */ +#define LP8551_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8551_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8552 */ +#define LP8552_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8552_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8553 */ +#define LP8553_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8553_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8556 */ +#define LP8556_PWM_CONFIG (LP8556_PWM_ONLY << BRT_MODE_SHFT) +#define LP8556_COMB1_CONFIG (LP8556_COMBINED1 << BRT_MODE_SHFT) +#define LP8556_I2C_CONFIG ((ENABLE_BL << BL_CTL_SHFT) | \ + (LP8556_I2C_ONLY << BRT_MODE_SHFT)) +#define LP8556_COMB2_CONFIG (LP8556_COMBINED2 << BRT_MODE_SHFT) + +/* ROM area boundary */ +#define EEPROM_START (0xA0) +#define EEPROM_END (0xA7) +#define EPROM_START (0xA0) +#define EPROM_END (0xAF) + +enum lp855x_chip_id { + LP8550, + LP8551, + LP8552, + LP8553, + LP8556, +}; + +enum lp855x_brightness_ctrl_mode { + PWM_BASED = 1, + REGISTER_BASED, +}; + +enum lp8550_brighntess_source { + LP8550_PWM_ONLY, + LP8550_I2C_ONLY = 2, +}; + +enum lp8551_brighntess_source { + LP8551_PWM_ONLY = LP8550_PWM_ONLY, + LP8551_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8552_brighntess_source { + LP8552_PWM_ONLY = LP8550_PWM_ONLY, + LP8552_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8553_brighntess_source { + LP8553_PWM_ONLY = LP8550_PWM_ONLY, + LP8553_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8556_brightness_source { + LP8556_PWM_ONLY, + LP8556_COMBINED1, /* pwm + i2c before the shaper block */ + LP8556_I2C_ONLY, + LP8556_COMBINED2, /* pwm + i2c after the shaper block */ +}; + +struct lp855x_pwm_data { + void (*pwm_set_intensity) (int brightness, int max_brightness); + int (*pwm_get_intensity) (int max_brightness); +}; + +struct lp855x_rom_data { + u8 addr; + u8 val; +}; + +/** + * struct lp855x_platform_data + * @name : Backlight driver name. If it is not defined, default name is set. + * @mode : brightness control by pwm or lp855x register + * @device_control : value of DEVICE CONTROL register + * @initial_brightness : initial value of backlight brightness + * @pwm_data : platform specific pwm generation functions. + Only valid when mode is PWM_BASED. + * @load_new_rom_data : + 0 : use default configuration data + 1 : update values of eeprom or eprom registers on loading driver + * @size_program : total size of lp855x_rom_data + * @rom_data : list of new eeprom/eprom registers + */ +struct lp855x_platform_data { + char *name; + enum lp855x_brightness_ctrl_mode mode; + u8 device_control; + int initial_brightness; + struct lp855x_pwm_data pwm_data; + u8 load_new_rom_data; + int size_program; + struct lp855x_rom_data *rom_data; +}; + +#endif -- cgit v1.2.3 From 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:03 -0700 Subject: bitops: rename for_each_set_bit_cont() in favor of analogous list.h function This renames for_each_set_bit_cont() to for_each_set_bit_from() because it is analogous to list_for_each_entry_from() in list.h rather than list_for_each_entry_continue(). This doesn't remove for_each_set_bit_cont() for now. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 94300fe46cce..a78e358f0c17 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -27,11 +27,14 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_cont(bit, addr, size) \ +#define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) +#define for_each_set_bit_cont(bit, addr, size) \ + for_each_set_bit_from(bit, addr, size) + static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From 0a329d2d5a1dd75273597538cdc33512ee38855e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:04 -0700 Subject: bitops: remove for_each_set_bit_cont() Remove for_each_set_bit_cont() after confirming that no one uses for_each_set_bit_cont() anymore. [sfr@canb.auug.org.au: regmap: cope with bitops API change] Signed-off-by: Akinobu Mita Signed-off-by: Stephen Rothwell Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a78e358f0c17..348b1dca477a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,9 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) -#define for_each_set_bit_cont(bit, addr, size) \ - for_each_set_bit_from(bit, addr, size) - static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From 03f4a8226c2f9c14361f75848d1e93139bab90c4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:04 -0700 Subject: bitops: introduce for_each_clear_bit() Introduce for_each_clear_bit() and for_each_clear_bit_from(). They are similar to for_each_set_bit() and list_for_each_set_bit_from(), but they iterate over all the cleared bits in a memory region. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: David Woodhouse Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Stefano Panella Cc: David Vrabel Cc: Sergei Shtylyov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 348b1dca477a..a3b6b82108b9 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,6 +32,17 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_first_zero_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From 5ae4e8a77dc82afcfe8460168ec0b94f4b79a54a Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'name' in the lp5521_led_config The name of each led channel can be configurable. For the compatibility, the name is set to default value(xx:channelN) when 'name' is not defined. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/leds-lp5521.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index fd548d2a8775..e675b8d4c7bf 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -26,6 +26,7 @@ /* See Documentation/leds/leds-lp5521.txt */ struct lp5521_led_config { + char *name; u8 chan_nr; u8 led_current; /* mA x10, 0 if led is not connected */ u8 max_current; -- cgit v1.2.3 From 3b49aacd0e56d5bf1b511f6554f17cd65eb8da64 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'update_config' in the lp5521_platform_data The value of CONFIG register(Addr 08h) is configurable. For supporting this feature, update_config is added in the platform data. If 'update_config' is not defined, the default value is 'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. To define CONFIG register in the platform data, the bit definitions were mo= ved to the header file. Documentation updated : description about 'update_config' and example. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/leds-lp5521.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e675b8d4c7bf..e9ab583cac36 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -36,6 +36,18 @@ struct lp5521_led_config { #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 +/* Bits in CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ +#define LP5521_CLK_INT 1 /* Internal clock */ +#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ + struct lp5521_platform_data { struct lp5521_led_config *led_config; u8 num_channels; @@ -44,6 +56,7 @@ struct lp5521_platform_data { void (*release_resources)(void); void (*enable)(bool state); const char *label; + u8 update_config; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3 From 011af7bc7cd188a0310e2d26cdc2cc5d90148b0c Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:09 -0700 Subject: drivers/leds/leds-lp5521.c: support led pattern data The lp5521 has autonomous operation mode without external control. Using lp5521_platform_data, various led patterns can be configurable. For supporting this feature, new functions and device attribute are added. Structure of lp5521_led_pattern: 3 channels are supported - red, green and blue. Pattern(s) of each channel and numbers of pattern(s) are defined in the pla= tform data. Pattern data are hexa codes which include pattern commands such like set pwm, wait, ramp up/down, branch and so on. Pattern mode functions: * lp5521_clear_program_memory Before running new led pattern, program memory should be cleared. * lp5521_write_program_memory Pattern data updated in the program memory via the i2c. * lp5521_get_pattern Get pattern from predefined in the platform data. * lp5521_run_led_pattern Stop current pattern or run new pattern. Transition time is required between different operation mode. Device attribute - 'led_pattern': To load specific led pattern, new device attribute is added. When the lp5521 driver is unloaded, stop current led pattern mode. Documentation updated : description about how to define the led patterns and example. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/leds-lp5521.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e9ab583cac36..3f071ec019b2 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -32,6 +32,15 @@ struct lp5521_led_config { u8 max_current; }; +struct lp5521_led_pattern { + u8 *r; + u8 *g; + u8 *b; + u8 size_r; + u8 size_g; + u8 size_b; +}; + #define LP5521_CLOCK_AUTO 0 #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 @@ -57,6 +66,8 @@ struct lp5521_platform_data { void (*enable)(bool state); const char *label; u8 update_config; + struct lp5521_led_pattern *patterns; + int num_patterns; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3 From bb982009d3850759d3f4a4c853f9c456c48b6c2d Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:12 -0700 Subject: leds-lm3530: support pwm input mode * add 'struct lm3530_pwm_data' in the platform data The pwm data is the platform specific functions which generate the pwm. The pwm data is only valid when brightness is pwm input mode. Functions should be implemented by the pwm driver. pwm_set_intensity() : set duty of pwm. pwm_get_intensity() : get current the brightness. * brightness control by pwm If the control mode is pwm, then brightness is changed by the duty of pwm=. So pwm platform function should be called in lm3530_brightness_set(). * do not update brightness register when pwm input mode In pwm input mode, brightness register is not used. If any value is updated in this register, then the led will be off. * when input mode is changed, set duty of pwm to 0 if unnecessary. Signed-off-by: Milo(Woogyom) Kim Cc: Linus Walleij Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/led-lm3530.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h index 8eb12357a110..eeae6e742471 100644 --- a/include/linux/led-lm3530.h +++ b/include/linux/led-lm3530.h @@ -72,6 +72,12 @@ enum lm3530_als_mode { LM3530_INPUT_CEIL, /* Max of ALS1 and ALS2 */ }; +/* PWM Platform Specific Data */ +struct lm3530_pwm_data { + void (*pwm_set_intensity) (int brightness, int max_brightness); + int (*pwm_get_intensity) (int max_brightness); +}; + /** * struct lm3530_platform_data * @mode: mode of operation i.e. Manual, ALS or PWM @@ -87,6 +93,7 @@ enum lm3530_als_mode { * @als_vmin: als input voltage calibrated for max brightness in mV * @als_vmax: als input voltage calibrated for min brightness in mV * @brt_val: brightness value (0-255) + * @pwm_data: PWM control functions (only valid when the mode is PWM) */ struct lm3530_platform_data { enum lm3530_mode mode; @@ -107,6 +114,8 @@ struct lm3530_platform_data { u32 als_vmax; u8 brt_val; + + struct lm3530_pwm_data pwm_data; }; #endif /* _LINUX_LED_LM3530_H__ */ -- cgit v1.2.3 From 6061d949dd984c762ee272a88e77699fa675d1c8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 23 Mar 2012 15:02:16 -0700 Subject: include/ and checkpatch: prefer __scanf to __attribute__((format(scanf,...) It's equivalent to __printf, so prefer __scanf. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 3 ++- include/linux/kernel.h | 8 ++++---- include/xen/xenbus.h | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3fd17c249221..e5834aa24b9e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -87,7 +87,8 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) -#define __printf(a,b) __attribute__((format(printf,a,b))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d801acb5e680..f2085b541a24 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -328,10 +328,10 @@ extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); -extern int sscanf(const char *, const char *, ...) - __attribute__ ((format (scanf, 2, 3))); -extern int vsscanf(const char *, const char *, va_list) - __attribute__ ((format (scanf, 2, 0))); +extern __scanf(2, 3) +int sscanf(const char *, const char *, ...); +extern __scanf(2, 0) +int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index e8c599b237c2..0a7515c1e3a4 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -139,9 +139,9 @@ int xenbus_transaction_start(struct xenbus_transaction *t); int xenbus_transaction_end(struct xenbus_transaction t, int abort); /* Single read and scanf: returns -errno or num scanned if > 0. */ +__scanf(4, 5) int xenbus_scanf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(scanf, 4, 5))); + const char *dir, const char *node, const char *fmt, ...); /* Single printf and write: returns -errno or 0. */ __printf(4, 5) -- cgit v1.2.3 From 46c5801eaf86e83cb3a4142ad35188db5011fff0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Mar 2012 15:02:25 -0700 Subject: crc32: bolt on crc32c Reuse the existing crc32 code to stamp out a crc32c implementation. Signed-off-by: Darrick J. Wong Cc: Herbert Xu Cc: Bob Pearson Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crc32.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 391a259b2cc9..68267b64bb98 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -11,6 +11,8 @@ extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len); extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len); +extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len); + #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* -- cgit v1.2.3 From 626cf236608505d376e4799adb4f7eb00a8594af Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 23 Mar 2012 15:02:27 -0700 Subject: poll: add poll_requested_events() and poll_does_not_wait() functions In some cases the poll() implementation in a driver has to do different things depending on the events the caller wants to poll for. An example is when a driver needs to start a DMA engine if the caller polls for POLLIN, but doesn't want to do that if POLLIN is not requested but instead only POLLOUT or POLLPRI is requested. This is something that can happen in the video4linux subsystem among others. Unfortunately, the current epoll/poll/select implementation doesn't provide that information reliably. The poll_table_struct does have it: it has a key field with the event mask. But once a poll() call matches one or more bits of that mask any following poll() calls are passed a NULL poll_table pointer. Also, the eventpoll implementation always left the key field at ~0 instead of using the requested events mask. This was changed in eventpoll.c so the key field now contains the actual events that should be polled for as set by the caller. The solution to the NULL poll_table pointer is to set the qproc field to NULL in poll_table once poll() matches the events, not the poll_table pointer itself. That way drivers can obtain the mask through a new poll_requested_events inline. The poll_table_struct can still be NULL since some kernel code calls it internally (netfs_state_poll() in ./drivers/staging/pohmelfs/netfs.h). In that case poll_requested_events() returns ~0 (i.e. all events). Very rarely drivers might want to know whether poll_wait will actually wait. If another earlier file descriptor in the set already matched the events the caller wanted to wait for, then the kernel will return from the select() call without waiting. This might be useful information in order to avoid doing expensive work. A new helper function poll_does_not_wait() is added that drivers can use to detect this situation. This is now used in sock_poll_wait() in include/net/sock.h. This was the only place in the kernel that needed this information. Drivers should no longer access any of the poll_table internals, but use the poll_requested_events() and poll_does_not_wait() access functions instead. In order to enforce that the poll_table fields are now prepended with an underscore and a comment was added warning against using them directly. This required a change in unix_dgram_poll() in unix/af_unix.c which used the key field to get the requested events. It's been replaced by a call to poll_requested_events(). For qproc it was especially important to change its name since the behavior of that field changes with this patch since this function pointer can now be NULL when that wasn't possible in the past. Any driver accessing the qproc or key fields directly will now fail to compile. Some notes regarding the correctness of this patch: the driver's poll() function is called with a 'struct poll_table_struct *wait' argument. This pointer may or may not be NULL, drivers can never rely on it being one or the other as that depends on whether or not an earlier file descriptor in the select()'s fdset matched the requested events. There are only three things a driver can do with the wait argument: 1) obtain the key field: events = wait ? wait->key : ~0; This will still work although it should be replaced with the new poll_requested_events() function (which does exactly the same). This will now even work better, since wait is no longer set to NULL unnecessarily. 2) use the qproc callback. This could be deadly since qproc can now be NULL. Renaming qproc should prevent this from happening. There are no kernel drivers that actually access this callback directly, BTW. 3) test whether wait == NULL to determine whether poll would return without waiting. This is no longer sufficient as the correct test is now wait == NULL || wait->_qproc == NULL. However, the worst that can happen here is a slight performance hit in the case where wait != NULL and wait->_qproc == NULL. In that case the driver will assume that poll_wait() will actually add the fd to the set of waiting file descriptors. Of course, poll_wait() will not do that since it tests for wait->_qproc. This will not break anything, though. There is only one place in the whole kernel where this happens (sock_poll_wait() in include/net/sock.h) and that code will be replaced by a call to poll_does_not_wait() in the next patch. Note that even if wait->_qproc != NULL drivers cannot rely on poll_wait() actually waiting. The next file descriptor from the set might match the event mask and thus any possible waits will never happen. Signed-off-by: Hans Verkuil Reviewed-by: Jonathan Corbet Reviewed-by: Al Viro Cc: Davide Libenzi Signed-off-by: Hans de Goede Cc: Mauro Carvalho Chehab Cc: David Miller Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 37 +++++++++++++++++++++++++++++++------ include/net/sock.h | 2 +- 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/poll.h b/include/linux/poll.h index cf40010ce0cd..48fe8bc398d1 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -32,21 +32,46 @@ struct poll_table_struct; */ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); +/* + * Do not touch the structure directly, use the access functions + * poll_does_not_wait() and poll_requested_events() instead. + */ typedef struct poll_table_struct { - poll_queue_proc qproc; - unsigned long key; + poll_queue_proc _qproc; + unsigned long _key; } poll_table; static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { - if (p && wait_address) - p->qproc(filp, wait_address, p); + if (p && p->_qproc && wait_address) + p->_qproc(filp, wait_address, p); +} + +/* + * Return true if it is guaranteed that poll will not wait. This is the case + * if the poll() of another file descriptor in the set got an event, so there + * is no need for waiting. + */ +static inline bool poll_does_not_wait(const poll_table *p) +{ + return p == NULL || p->_qproc == NULL; +} + +/* + * Return the set of events that the application wants to poll for. + * This is useful for drivers that need to know whether a DMA transfer has + * to be started implicitly on poll(). You typically only want to do that + * if the application is actually polling for POLLIN and/or POLLOUT. + */ +static inline unsigned long poll_requested_events(const poll_table *p) +{ + return p ? p->_key : ~0UL; } static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) { - pt->qproc = qproc; - pt->key = ~0UL; /* all events enabled */ + pt->_qproc = qproc; + pt->_key = ~0UL; /* all events enabled */ } struct poll_table_entry { diff --git a/include/net/sock.h b/include/net/sock.h index 04bc0b30e9e9..a6ba1f8871fd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1854,7 +1854,7 @@ static inline bool wq_has_sleeper(struct socket_wq *wq) static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - if (p && wait_address) { + if (!poll_does_not_wait(p) && wait_address) { poll_wait(filp, wait_address, p); /* * We need to be sure we are in sync with the -- cgit v1.2.3 From 15cab952139404d0e593cb1aaab0a3547ac0f95b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:39 -0700 Subject: ptrace: the killed tracee should not enter the syscall Another old/known problem. If the tracee is killed after it reports syscall_entry, it starts the syscall and debugger can't control this. This confuses the users and this creates the security problems for ptrace jailers. Change tracehook_report_syscall_entry() to return non-zero if killed, this instructs syscall_trace_enter() to abort the syscall. Reported-by: Chris Evans Tested-by: Indan Zupancic Signed-off-by: Oleg Nesterov Cc: Denys Vlasenko Cc: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index a71a2927a6a0..51bd91d911c3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -54,12 +54,12 @@ struct linux_binprm; /* * ptrace report for syscall entry and exit looks identical. */ -static inline void ptrace_report_syscall(struct pt_regs *regs) +static inline int ptrace_report_syscall(struct pt_regs *regs) { int ptrace = current->ptrace; if (!(ptrace & PT_PTRACED)) - return; + return 0; ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); @@ -72,6 +72,8 @@ static inline void ptrace_report_syscall(struct pt_regs *regs) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + + return fatal_signal_pending(current); } /** @@ -96,8 +98,7 @@ static inline void ptrace_report_syscall(struct pt_regs *regs) static inline __must_check int tracehook_report_syscall_entry( struct pt_regs *regs) { - ptrace_report_syscall(regs); - return 0; + return ptrace_report_syscall(regs); } /** -- cgit v1.2.3 From b1845ff53f1a9eadba005ae53dfe60ab00dfe83b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:40 -0700 Subject: ptrace: don't send SIGTRAP on exec if SEIZED ptrace_event(PTRACE_EVENT_EXEC) sends SIGTRAP if PT_TRACE_EXEC is not set. This is because this SIGTRAP predates PTRACE_O_TRACEEXEC option, we do not need/want this with PT_SEIZED which can set the options during attach. Suggested-by: Pedro Alves Signed-off-by: Oleg Nesterov Cc: Chris Evans Cc: Indan Zupancic Cc: Denys Vlasenko Cc: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index c2f1f6a5fcb8..6fdb196caa3e 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -199,9 +199,10 @@ static inline void ptrace_event(int event, unsigned long message) if (unlikely(ptrace_event_enabled(current, event))) { current->ptrace_message = message; ptrace_notify((event << 8) | SIGTRAP); - } else if (event == PTRACE_EVENT_EXEC && unlikely(current->ptrace)) { + } else if (event == PTRACE_EVENT_EXEC) { /* legacy EXEC report via SIGTRAP */ - send_sig(SIGTRAP, current, 0); + if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) + send_sig(SIGTRAP, current, 0); } } -- cgit v1.2.3 From 86b6c1f301faf085de5a3f9ce16b8de6e69c729b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 23 Mar 2012 15:02:41 -0700 Subject: ptrace: simplify PTRACE_foo constants and PTRACE_SETOPTIONS code Exchange PT_TRACESYSGOOD and PT_PTRACE_CAP bit positions, which makes PT_option bits contiguous and therefore makes code in ptrace_setoptions() much simpler. Every PTRACE_O_TRACEevent is defined to (1 << PTRACE_EVENT_event) instead of using explicit numeric constants, to ensure we don't mess up relationship between bit positions and event ids. PT_EVENT_FLAG_SHIFT was not particularly useful, PT_OPT_FLAG_SHIFT with value of PT_EVENT_FLAG_SHIFT-1 is easier to use. PT_TRACE_MASK constant is nuked, the only its use is replaced by (PTRACE_O_MASK << PT_OPT_FLAG_SHIFT). Signed-off-by: Denys Vlasenko Acked-by: Tejun Heo Reviewed-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6fdb196caa3e..6f1260ee5be5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,17 +54,6 @@ /* flags in @data for PTRACE_SEIZE */ #define PTRACE_SEIZE_DEVEL 0x80000000 /* temp flag for development */ -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 -#define PTRACE_O_TRACEFORK 0x00000002 -#define PTRACE_O_TRACEVFORK 0x00000004 -#define PTRACE_O_TRACECLONE 0x00000008 -#define PTRACE_O_TRACEEXEC 0x00000010 -#define PTRACE_O_TRACEVFORKDONE 0x00000020 -#define PTRACE_O_TRACEEXIT 0x00000040 - -#define PTRACE_O_MASK 0x0000007f - /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 @@ -74,6 +63,17 @@ #define PTRACE_EVENT_EXIT 6 #define PTRACE_EVENT_STOP 7 +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 1 +#define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK) +#define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK) +#define PTRACE_O_TRACECLONE (1 << PTRACE_EVENT_CLONE) +#define PTRACE_O_TRACEEXEC (1 << PTRACE_EVENT_EXEC) +#define PTRACE_O_TRACEVFORKDONE (1 << PTRACE_EVENT_VFORK_DONE) +#define PTRACE_O_TRACEEXIT (1 << PTRACE_EVENT_EXIT) + +#define PTRACE_O_MASK 0x0000007f + #include #ifdef __KERNEL__ @@ -88,13 +88,12 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000004 -#define PT_PTRACE_CAP 0x00000008 /* ptracer can follow suid-exec */ +#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ +#define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ -#define PT_EVENT_FLAG_SHIFT 4 -#define PT_EVENT_FLAG(event) (1 << (PT_EVENT_FLAG_SHIFT + (event) - 1)) - +#define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) +#define PT_TRACESYSGOOD PT_EVENT_FLAG(0) #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) @@ -102,8 +101,6 @@ #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) -#define PT_TRACE_MASK 0x000003f4 - /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 #define PT_SINGLESTEP (1< Date: Fri, 23 Mar 2012 15:02:42 -0700 Subject: ptrace: renumber PTRACE_EVENT_STOP so that future new options and events can match PTRACE_EVENT_foo and PTRACE_O_TRACEfoo used to match. New PTRACE_EVENT_STOP is the first event which has no corresponding PTRACE_O_TRACE option. If we will ever want to add another such option, its PTRACE_EVENT's value will collide with PTRACE_EVENT_STOP's value. This patch changes PTRACE_EVENT_STOP value to prevent this. While at it, added a comment - the one atop PTRACE_EVENT block, saying "Wait extended result codes for the above trace options", is not true for PTRACE_EVENT_STOP. Signed-off-by: Denys Vlasenko Cc: Tejun Heo Reviewed-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6f1260ee5be5..30be18064dfd 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -61,7 +61,8 @@ #define PTRACE_EVENT_EXEC 4 #define PTRACE_EVENT_VFORK_DONE 5 #define PTRACE_EVENT_EXIT 6 -#define PTRACE_EVENT_STOP 7 +/* Extended result codes which enabled by means other than options. */ +#define PTRACE_EVENT_STOP 128 /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 1 -- cgit v1.2.3 From ee00560c7dac1dbbf048446a8489550d0a5765b7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 23 Mar 2012 15:02:43 -0700 Subject: ptrace: remove PTRACE_SEIZE_DEVEL bit PTRACE_SEIZE code is tested and ready for production use, remove the code which requires special bit in data argument to make PTRACE_SEIZE work. Strace team prepares for a new release of strace, and we would like to ship the code which uses PTRACE_SEIZE, preferably after this change goes into released kernel. Signed-off-by: Denys Vlasenko Acked-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 30be18064dfd..407c678d2e30 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -51,9 +51,6 @@ #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 -/* flags in @data for PTRACE_SEIZE */ -#define PTRACE_SEIZE_DEVEL 0x80000000 /* temp flag for development */ - /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 @@ -64,7 +61,7 @@ /* Extended result codes which enabled by means other than options. */ #define PTRACE_EVENT_STOP 128 -/* options set using PTRACE_SETOPTIONS */ +/* Options set using PTRACE_SETOPTIONS or using PTRACE_SEIZE @data param */ #define PTRACE_O_TRACESYSGOOD 1 #define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK) #define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK) -- cgit v1.2.3 From d0bd587a80960d7ba7e0c8396e154028c9045c54 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:47 -0700 Subject: usermodehelper: implement UMH_KILLABLE Implement UMH_KILLABLE, should be used along with UMH_WAIT_EXEC/PROC. The caller must ensure that subprocess_info->path/etc can not go away until call_usermodehelper_freeinfo(). call_usermodehelper_exec(UMH_KILLABLE) does wait_for_completion_killable. If it fails, it uses xchg(&sub_info->complete, NULL) to serialize with umh_complete() which does the same xhcg() to access sub_info->complete. If call_usermodehelper_exec wins, it can safely return. umh_complete() should get NULL and call call_usermodehelper_freeinfo(). Otherwise we know that umh_complete() was already called, in this case call_usermodehelper_exec() falls back to wait_for_completion() which should succeed "very soon". Note: UMH_NO_WAIT == -1 but it obviously should not be used with UMH_KILLABLE. We delay the neccessary cleanup to simplify the back porting. Signed-off-by: Oleg Nesterov Cc: Tetsuo Handa Cc: Rusty Russell Cc: Tejun Heo Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 722f477c4ef7..1b5985855ffc 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -54,6 +54,8 @@ enum umh_wait { UMH_WAIT_PROC = 1, /* wait for the process to complete */ }; +#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ + struct subprocess_info { struct work_struct work; struct completion *complete; -- cgit v1.2.3 From 9d944ef32e83405a07376f112e9f02161d3e9731 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:48 -0700 Subject: usermodehelper: kill umh_wait, renumber UMH_* constants No functional changes. It is not sane to use UMH_KILLABLE with enum umh_wait, but obviously we do not want another argument in call_usermodehelper_* helpers. Kill this enum, use the plain int. Signed-off-by: Oleg Nesterov Cc: Tetsuo Handa Cc: Rusty Russell Cc: Tejun Heo Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 1b5985855ffc..9efeae679106 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -48,12 +48,9 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; struct cred; struct file; -enum umh_wait { - UMH_NO_WAIT = -1, /* don't wait at all */ - UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ - UMH_WAIT_PROC = 1, /* wait for the process to complete */ -}; - +#define UMH_NO_WAIT 0 /* don't wait at all */ +#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 2 /* wait for the process to complete */ #define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ struct subprocess_info { @@ -62,7 +59,7 @@ struct subprocess_info { char *path; char **argv; char **envp; - enum umh_wait wait; + int wait; int retval; int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); @@ -80,15 +77,14 @@ void call_usermodehelper_setfns(struct subprocess_info *info, void *data); /* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); +int call_usermodehelper_exec(struct subprocess_info *info, int wait); /* Free the subprocess_info. This is only needed if you're not going to call call_usermodehelper_exec */ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper_fns(char *path, char **argv, char **envp, - enum umh_wait wait, +call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data) { @@ -106,7 +102,7 @@ call_usermodehelper_fns(char *path, char **argv, char **envp, } static inline int -call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) +call_usermodehelper(char *path, char **argv, char **envp, int wait) { return call_usermodehelper_fns(path, argv, envp, wait, NULL, NULL, NULL); -- cgit v1.2.3 From 909af768e88867016f427264ae39d27a57b6a8ed Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: remove VM_ALWAYSDUMP flag The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc//coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7330742e7973..2de2ddba51d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,7 +111,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ -#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ -- cgit v1.2.3 From accb61fe7bb0f5c2a4102239e4981650f9048519 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: add VM_NODUMP, MADV_NODUMP, MADV_CLEAR_NODUMP Since we no longer need the VM_ALWAYSDUMP flag, let's use the freed bit for 'VM_NODUMP' flag. The idea is is to add a new madvise() flag: MADV_DONTDUMP, which can be set by applications to specifically request memory regions which should not dump core. The specific application I have in mind is qemu: we can add a flag there that wouldn't dump all of guest memory when qemu dumps core. This flag might also be useful for security sensitive apps that want to absolutely make sure that parts of memory are not dumped. To clear the flag use: MADV_DODUMP. [akpm@linux-foundation.org: s/MADV_NODUMP/MADV_DONTDUMP/, s/MADV_CLEAR_NODUMP/MADV_DODUMP/, per Roland] [akpm@linux-foundation.org: fix up the architectures which broke] Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Cc: Ralf Baechle Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/mman-common.h | 4 ++++ include/linux/mm.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index 787abbb6d867..d030d2c2647a 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -48,6 +48,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/linux/mm.h b/include/linux/mm.h index 2de2ddba51d4..a6fabdfd34c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,6 +111,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_NODUMP 0x04000000 /* Do not include in the core dump */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ -- cgit v1.2.3 From 1ac101a5d675aca2426c5cd460c73fb95acb8391 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 23 Mar 2012 15:02:54 -0700 Subject: procfs: add num_to_str() to speed up /proc/stat == stat_check.py num = 0 with open("/proc/stat") as f: while num < 1000 : data = f.read() f.seek(0, 0) num = num + 1 == perf shows 20.39% stat_check.py [kernel.kallsyms] [k] format_decode 13.41% stat_check.py [kernel.kallsyms] [k] number 12.61% stat_check.py [kernel.kallsyms] [k] vsnprintf 10.85% stat_check.py [kernel.kallsyms] [k] memcpy 4.85% stat_check.py [kernel.kallsyms] [k] radix_tree_lookup 4.43% stat_check.py [kernel.kallsyms] [k] seq_printf This patch removes most of calls to vsnprintf() by adding num_to_str() and seq_print_decimal_ull(), which prints decimal numbers without rich functions provided by printf(). On my 8cpu box. == Before patch == [root@bluextal test]# time ./stat_check.py real 0m0.150s user 0m0.026s sys 0m0.121s == After patch == [root@bluextal test]# time ./stat_check.py real 0m0.055s user 0m0.022s sys 0m0.030s [akpm@linux-foundation.org: remove incorrect comment, use less statck in num_to_str(), move comment from .h to .c, simplify seq_put_decimal_ull()] [andrea@betterlinux.com: avoid breaking the ABI in /proc/stat] Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrea Righi Cc: Eric Dumazet Cc: Glauber Costa Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul Turner Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ include/linux/seq_file.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2085b541a24..3e140add5360 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -312,6 +312,8 @@ extern long long simple_strtoll(const char *,char **,unsigned int); #define strict_strtoull kstrtoull #define strict_strtoll kstrtoll +extern int num_to_str(char *buf, int size, unsigned long long num); + /* lib/printf utilities */ extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 44f1514b00ba..5bba42c99448 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -121,9 +121,10 @@ int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); +int seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num); #define SEQ_START_TOKEN ((void *)1) - /* * Helpers for iteration over list_head-s in seq_files */ -- cgit v1.2.3 From bda7bad62bc4c4e0783348e8db51abe094153c56 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 23 Mar 2012 15:02:54 -0700 Subject: procfs: speed up /proc/pid/stat, statm Process accounting applications as top, ps visit some files under /proc/. With seq_put_decimal_ull(), we can optimize /proc//stat and /proc//statm files. This patch adds - seq_put_decimal_ll() for signed values. - allow delimiter == 0. - convert seq_printf() to seq_put_decimal_ull/ll in /proc/stat, statm. Test result on a system with 2000+ procs. Before patch: [kamezawa@bluextal test]$ top -b -n 1 | wc -l 2223 [kamezawa@bluextal test]$ time top -b -n 1 > /dev/null real 0m0.675s user 0m0.044s sys 0m0.121s [kamezawa@bluextal test]$ time ps -elf > /dev/null real 0m0.236s user 0m0.056s sys 0m0.176s After patch: kamezawa@bluextal ~]$ time top -b -n 1 > /dev/null real 0m0.657s user 0m0.052s sys 0m0.100s [kamezawa@bluextal ~]$ time ps -elf > /dev/null real 0m0.198s user 0m0.050s sys 0m0.145s Considering top, ps tend to scan /proc periodically, this will reduce cpu consumption by top/ps to some extent. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 5bba42c99448..54e5ae7f8adc 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,8 @@ int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); int seq_put_decimal_ull(struct seq_file *m, char delimiter, unsigned long long num); +int seq_put_decimal_ll(struct seq_file *m, char delimiter, + long long num); #define SEQ_START_TOKEN ((void *)1) /* -- cgit v1.2.3