summaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-09-15 15:08:05 +0200
committerJiri Kosina <jkosina@suse.cz>2011-09-15 15:08:18 +0200
commite060c38434b2caa78efe7cedaff4191040b65a15 (patch)
tree407361230bf6733f63d8e788e4b5e6566ee04818 /ipc
parent10e4ac572eeffe5317019bd7330b6058a400dfc2 (diff)
parentcc39c6a9bbdebfcf1a7dee64d83bf302bc38d941 (diff)
downloadlinux-e060c38434b2caa78efe7cedaff4191040b65a15.tar.gz
linux-e060c38434b2caa78efe7cedaff4191040b65a15.tar.bz2
linux-e060c38434b2caa78efe7cedaff4191040b65a15.zip
Merge branch 'master' into for-next
Fast-forward merge with Linus to be able to merge patches based on more recent version of the tree.
Diffstat (limited to 'ipc')
-rw-r--r--ipc/ipc_sysctl.c36
-rw-r--r--ipc/mqueue.c125
-rw-r--r--ipc/shm.c117
3 files changed, 212 insertions, 66 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 56410faa4550..00fba2bab87d 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
+
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
+static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table ipc_table;
+
+ memcpy(&ipc_table, table, sizeof(ipc_table));
+ ipc_table.data = get_ipc(table);
+
+ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+}
+
+static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+ int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (err < 0)
+ return err;
+ if (ns->shm_rmid_forced)
+ shm_destroy_orphaned(ns);
+ return err;
+}
+
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
+#define proc_ipc_dointvec_minmax NULL
+#define proc_ipc_dointvec_minmax_orphans NULL
#define proc_ipc_callback_dointvec NULL
#define proc_ipcauto_dointvec_minmax NULL
#endif
@@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = {
.proc_handler = proc_ipc_dointvec,
},
{
+ .procname = "shm_rmid_forced",
+ .data = &init_ipc_ns.shm_rmid_forced,
+ .maxlen = sizeof(init_ipc_ns.shm_rmid_forced),
+ .mode = 0644,
+ .proc_handler = proc_ipc_dointvec_minmax_orphans,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 14fb6d67e6a3..ed049ea568f4 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -113,72 +113,75 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
{
struct user_struct *u = current_user();
struct inode *inode;
+ int ret = -ENOMEM;
inode = new_inode(sb);
- if (inode) {
- inode->i_ino = get_next_ino();
- inode->i_mode = mode;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_mtime = inode->i_ctime = inode->i_atime =
- CURRENT_TIME;
+ if (!inode)
+ goto err;
- if (S_ISREG(mode)) {
- struct mqueue_inode_info *info;
- struct task_struct *p = current;
- unsigned long mq_bytes, mq_msg_tblsz;
-
- inode->i_fop = &mqueue_file_operations;
- inode->i_size = FILENT_SIZE;
- /* mqueue specific info */
- info = MQUEUE_I(inode);
- spin_lock_init(&info->lock);
- init_waitqueue_head(&info->wait_q);
- INIT_LIST_HEAD(&info->e_wait_q[0].list);
- INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->notify_owner = NULL;
- info->qsize = 0;
- info->user = NULL; /* set when all is ok */
- memset(&info->attr, 0, sizeof(info->attr));
- info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
- info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
- if (attr) {
- info->attr.mq_maxmsg = attr->mq_maxmsg;
- info->attr.mq_msgsize = attr->mq_msgsize;
- }
- mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
- info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
- if (!info->messages)
- goto out_inode;
-
- mq_bytes = (mq_msg_tblsz +
- (info->attr.mq_maxmsg * info->attr.mq_msgsize));
-
- spin_lock(&mq_lock);
- if (u->mq_bytes + mq_bytes < u->mq_bytes ||
- u->mq_bytes + mq_bytes >
- task_rlimit(p, RLIMIT_MSGQUEUE)) {
- spin_unlock(&mq_lock);
- /* mqueue_evict_inode() releases info->messages */
- goto out_inode;
- }
- u->mq_bytes += mq_bytes;
- spin_unlock(&mq_lock);
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+
+ if (S_ISREG(mode)) {
+ struct mqueue_inode_info *info;
+ struct task_struct *p = current;
+ unsigned long mq_bytes, mq_msg_tblsz;
+
+ inode->i_fop = &mqueue_file_operations;
+ inode->i_size = FILENT_SIZE;
+ /* mqueue specific info */
+ info = MQUEUE_I(inode);
+ spin_lock_init(&info->lock);
+ init_waitqueue_head(&info->wait_q);
+ INIT_LIST_HEAD(&info->e_wait_q[0].list);
+ INIT_LIST_HEAD(&info->e_wait_q[1].list);
+ info->notify_owner = NULL;
+ info->qsize = 0;
+ info->user = NULL; /* set when all is ok */
+ memset(&info->attr, 0, sizeof(info->attr));
+ info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+ info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+ if (attr) {
+ info->attr.mq_maxmsg = attr->mq_maxmsg;
+ info->attr.mq_msgsize = attr->mq_msgsize;
+ }
+ mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+ if (!info->messages)
+ goto out_inode;
- /* all is ok */
- info->user = get_uid(u);
- } else if (S_ISDIR(mode)) {
- inc_nlink(inode);
- /* Some things misbehave if size == 0 on a directory */
- inode->i_size = 2 * DIRENT_SIZE;
- inode->i_op = &mqueue_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ mq_bytes = (mq_msg_tblsz +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+ spin_lock(&mq_lock);
+ if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+ u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
+ spin_unlock(&mq_lock);
+ /* mqueue_evict_inode() releases info->messages */
+ ret = -EMFILE;
+ goto out_inode;
}
+ u->mq_bytes += mq_bytes;
+ spin_unlock(&mq_lock);
+
+ /* all is ok */
+ info->user = get_uid(u);
+ } else if (S_ISDIR(mode)) {
+ inc_nlink(inode);
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * DIRENT_SIZE;
+ inode->i_op = &mqueue_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
}
+
return inode;
out_inode:
iput(inode);
- return NULL;
+err:
+ return ERR_PTR(ret);
}
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
@@ -194,8 +197,8 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
NULL);
- if (!inode) {
- error = -ENOMEM;
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
goto out;
}
@@ -315,8 +318,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
spin_unlock(&mq_lock);
inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
- if (!inode) {
- error = -ENOMEM;
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
spin_lock(&mq_lock);
ipc_ns->mq_queues_count--;
goto out_unlock;
diff --git a/ipc/shm.c b/ipc/shm.c
index 27884adb1a90..02ecf2c078fc 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
ns->shm_ctlmax = SHMMAX;
ns->shm_ctlall = SHMALL;
ns->shm_ctlmni = SHMMNI;
+ ns->shm_rmid_forced = 0;
ns->shm_tot = 0;
ipc_init_ids(&shm_ids(ns));
}
@@ -104,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns)
}
#endif
-void __init shm_init (void)
+static int __init ipc_ns_init(void)
{
shm_init_ns(&init_ipc_ns);
+ return 0;
+}
+
+pure_initcall(ipc_ns_init);
+
+void __init shm_init (void)
+{
ipc_init_proc_interface("sysvipc/shm",
#if BITS_PER_LONG <= 32
" key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
@@ -130,6 +138,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
return container_of(ipcp, struct shmid_kernel, shm_perm);
}
+static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
+{
+ rcu_read_lock();
+ spin_lock(&ipcp->shm_perm.lock);
+}
+
static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
int id)
{
@@ -187,6 +201,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
}
/*
+ * shm_may_destroy - identifies whether shm segment should be destroyed now
+ *
+ * Returns true if and only if there are no active users of the segment and
+ * one of the following is true:
+ *
+ * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
+ *
+ * 2) sysctl kernel.shm_rmid_forced is set to 1.
+ */
+static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+ return (shp->shm_nattch == 0) &&
+ (ns->shm_rmid_forced ||
+ (shp->shm_perm.mode & SHM_DEST));
+}
+
+/*
* remove the attach descriptor vma.
* free memory for segment if it is marked destroyed.
* The descriptor has already been removed from the current->mm->mmap list
@@ -206,14 +237,90 @@ static void shm_close(struct vm_area_struct *vma)
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
shp->shm_nattch--;
- if(shp->shm_nattch == 0 &&
- shp->shm_perm.mode & SHM_DEST)
+ if (shm_may_destroy(ns, shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
up_write(&shm_ids(ns).rw_mutex);
}
+/* Called with ns->shm_ids(ns).rw_mutex locked */
+static int shm_try_destroy_current(int id, void *p, void *data)
+{
+ struct ipc_namespace *ns = data;
+ struct kern_ipc_perm *ipcp = p;
+ struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+
+ if (shp->shm_creator != current)
+ return 0;
+
+ /*
+ * Mark it as orphaned to destroy the segment when
+ * kernel.shm_rmid_forced is changed.
+ * It is noop if the following shm_may_destroy() returns true.
+ */
+ shp->shm_creator = NULL;
+
+ /*
+ * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
+ * is not set, it shouldn't be deleted here.
+ */
+ if (!ns->shm_rmid_forced)
+ return 0;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ return 0;
+}
+
+/* Called with ns->shm_ids(ns).rw_mutex locked */
+static int shm_try_destroy_orphaned(int id, void *p, void *data)
+{
+ struct ipc_namespace *ns = data;
+ struct kern_ipc_perm *ipcp = p;
+ struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+
+ /*
+ * We want to destroy segments without users and with already
+ * exit'ed originating process.
+ *
+ * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
+ */
+ if (shp->shm_creator != NULL)
+ return 0;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ return 0;
+}
+
+void shm_destroy_orphaned(struct ipc_namespace *ns)
+{
+ down_write(&shm_ids(ns).rw_mutex);
+ if (shm_ids(ns).in_use)
+ idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
+ up_write(&shm_ids(ns).rw_mutex);
+}
+
+
+void exit_shm(struct task_struct *task)
+{
+ struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+
+ if (shm_ids(ns).in_use == 0)
+ return;
+
+ /* Destroy all already created segments, but not mapped yet */
+ down_write(&shm_ids(ns).rw_mutex);
+ if (shm_ids(ns).in_use)
+ idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+ up_write(&shm_ids(ns).rw_mutex);
+}
+
static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct file *file = vma->vm_file;
@@ -404,6 +511,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_segsz = size;
shp->shm_nattch = 0;
shp->shm_file = file;
+ shp->shm_creator = current;
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
* proc-ps tools use this. Changing this will break them.
@@ -950,8 +1058,7 @@ out_nattch:
shp = shm_lock(ns, shmid);
BUG_ON(IS_ERR(shp));
shp->shm_nattch--;
- if(shp->shm_nattch == 0 &&
- shp->shm_perm.mode & SHM_DEST)
+ if (shm_may_destroy(ns, shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);