diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/ipc_sysctl.c | 14 | ||||
-rw-r--r-- | ipc/mqueue.c | 82 | ||||
-rw-r--r-- | ipc/msgutil.c | 6 | ||||
-rw-r--r-- | ipc/util.c | 49 | ||||
-rw-r--r-- | ipc/util.h | 47 |
5 files changed, 141 insertions, 57 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 49f9bf4ffc7f..bfaae457810c 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -120,7 +120,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, static int zero; static int one = 1; static int int_max = INT_MAX; -static int ipc_mni = IPCMNI; +int ipc_mni = IPCMNI; +int ipc_mni_shift = IPCMNI_SHIFT; +int ipc_min_cycle = RADIX_TREE_MAP_SIZE; static struct ctl_table ipc_kern_table[] = { { @@ -246,3 +248,13 @@ static int __init ipc_sysctl_init(void) } device_initcall(ipc_sysctl_init); + +static int __init ipc_mni_extend(char *str) +{ + ipc_mni = IPCMNI_EXTEND; + ipc_mni_shift = IPCMNI_EXTEND_SHIFT; + ipc_min_cycle = IPCMNI_EXTEND_MIN_CYCLE; + pr_info("IPCMNI extended to %d.\n", ipc_mni); + return 0; +} +early_param("ipcmni_extend", ipc_mni_extend); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index aea30530c472..216cad1ff0d0 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -76,6 +76,7 @@ struct mqueue_inode_info { wait_queue_head_t wait_q; struct rb_root msg_tree; + struct rb_node *msg_tree_rightmost; struct posix_msg_tree_node *node_cache; struct mq_attr attr; @@ -131,6 +132,7 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) { struct rb_node **p, *parent = NULL; struct posix_msg_tree_node *leaf; + bool rightmost = true; p = &info->msg_tree.rb_node; while (*p) { @@ -139,9 +141,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) if (likely(leaf->priority == msg->m_type)) goto insert_msg; - else if (msg->m_type < leaf->priority) + else if (msg->m_type < leaf->priority) { p = &(*p)->rb_left; - else + rightmost = false; + } else p = &(*p)->rb_right; } if (info->node_cache) { @@ -154,6 +157,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) INIT_LIST_HEAD(&leaf->msg_list); } leaf->priority = msg->m_type; + + if (rightmost) + info->msg_tree_rightmost = &leaf->rb_node; + rb_link_node(&leaf->rb_node, parent, p); rb_insert_color(&leaf->rb_node, &info->msg_tree); insert_msg: @@ -163,23 +170,35 @@ insert_msg: return 0; } +static inline void msg_tree_erase(struct posix_msg_tree_node *leaf, + struct mqueue_inode_info *info) +{ + struct rb_node *node = &leaf->rb_node; + + if (info->msg_tree_rightmost == node) + info->msg_tree_rightmost = rb_prev(node); + + rb_erase(node, &info->msg_tree); + if (info->node_cache) { + kfree(leaf); + } else { + info->node_cache = leaf; + } +} + static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) { - struct rb_node **p, *parent = NULL; + struct rb_node *parent = NULL; struct posix_msg_tree_node *leaf; struct msg_msg *msg; try_again: - p = &info->msg_tree.rb_node; - while (*p) { - parent = *p; - /* - * During insert, low priorities go to the left and high to the - * right. On receive, we want the highest priorities first, so - * walk all the way to the right. - */ - p = &(*p)->rb_right; - } + /* + * During insert, low priorities go to the left and high to the + * right. On receive, we want the highest priorities first, so + * walk all the way to the right. + */ + parent = info->msg_tree_rightmost; if (!parent) { if (info->attr.mq_curmsgs) { pr_warn_once("Inconsistency in POSIX message queue, " @@ -194,24 +213,14 @@ try_again: pr_warn_once("Inconsistency in POSIX message queue, " "empty leaf node but we haven't implemented " "lazy leaf delete!\n"); - rb_erase(&leaf->rb_node, &info->msg_tree); - if (info->node_cache) { - kfree(leaf); - } else { - info->node_cache = leaf; - } + msg_tree_erase(leaf, info); goto try_again; } else { msg = list_first_entry(&leaf->msg_list, struct msg_msg, m_list); list_del(&msg->m_list); if (list_empty(&leaf->msg_list)) { - rb_erase(&leaf->rb_node, &info->msg_tree); - if (info->node_cache) { - kfree(leaf); - } else { - info->node_cache = leaf; - } + msg_tree_erase(leaf, info); } } info->attr.mq_curmsgs--; @@ -254,6 +263,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->qsize = 0; info->user = NULL; /* set when all is ok */ info->msg_tree = RB_ROOT; + info->msg_tree_rightmost = NULL; info->node_cache = NULL; memset(&info->attr, 0, sizeof(info->attr)); info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, @@ -419,24 +429,19 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void mqueue_i_callback(struct rcu_head *head) +static void mqueue_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } -static void mqueue_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, mqueue_i_callback); -} - static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; struct user_struct *user; unsigned long mq_bytes, mq_treesize; struct ipc_namespace *ipc_ns; - struct msg_msg *msg; + struct msg_msg *msg, *nmsg; + LIST_HEAD(tmp_msg); clear_inode(inode); @@ -447,10 +452,15 @@ static void mqueue_evict_inode(struct inode *inode) info = MQUEUE_I(inode); spin_lock(&info->lock); while ((msg = msg_get(info)) != NULL) - free_msg(msg); + list_add_tail(&msg->m_list, &tmp_msg); kfree(info->node_cache); spin_unlock(&info->lock); + list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) { + list_del(&msg->m_list); + free_msg(msg); + } + /* Total amount of bytes accounted for the mqueue */ mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * @@ -611,8 +621,6 @@ static void wq_add(struct mqueue_inode_info *info, int sr, { struct ext_wait_queue *walk; - ewp->task = current; - list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { if (walk->task->prio <= current->prio) { list_add_tail(&ewp->list, &walk->list); @@ -1562,7 +1570,7 @@ static const struct file_operations mqueue_file_operations = { static const struct super_operations mqueue_super_ops = { .alloc_inode = mqueue_alloc_inode, - .destroy_inode = mqueue_destroy_inode, + .free_inode = mqueue_free_inode, .evict_inode = mqueue_evict_inode, .statfs = simple_statfs, }; diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 84598025a6ad..e65593742e2b 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -18,6 +18,7 @@ #include <linux/utsname.h> #include <linux/proc_ns.h> #include <linux/uaccess.h> +#include <linux/sched.h> #include "util.h" @@ -64,6 +65,9 @@ static struct msg_msg *alloc_msg(size_t len) pseg = &msg->next; while (len > 0) { struct msg_msgseg *seg; + + cond_resched(); + alen = min(len, DATALEN_SEG); seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT); if (seg == NULL) @@ -176,6 +180,8 @@ void free_msg(struct msg_msg *msg) kfree(msg); while (seg != NULL) { struct msg_msgseg *tmp = seg->next; + + cond_resched(); kfree(seg); seg = tmp; } diff --git a/ipc/util.c b/ipc/util.c index 0af05752969f..d126d156efc6 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -101,7 +101,6 @@ static const struct rhashtable_params ipc_kht_params = { .head_offset = offsetof(struct kern_ipc_perm, khtnode), .key_offset = offsetof(struct kern_ipc_perm, key), .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key), - .locks_mul = 1, .automatic_shrinking = true, }; @@ -110,7 +109,7 @@ static const struct rhashtable_params ipc_kht_params = { * @ids: ipc identifier set * * Set up the sequence range to use for the ipc identifier range (limited - * below IPCMNI) then initialise the keys hashtable and ids idr. + * below ipc_mni) then initialise the keys hashtable and ids idr. */ void ipc_init_ids(struct ipc_ids *ids) { @@ -120,6 +119,7 @@ void ipc_init_ids(struct ipc_ids *ids) rhashtable_init(&ids->key_ht, &ipc_kht_params); idr_init(&ids->ipcs_idr); ids->max_idx = -1; + ids->last_idx = -1; #ifdef CONFIG_CHECKPOINT_RESTORE ids->next_id = -1; #endif @@ -193,6 +193,10 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) * * The caller must own kern_ipc_perm.lock.of the new object. * On error, the function returns a (negative) error code. + * + * To conserve sequence number space, especially with extended ipc_mni, + * the sequence number is incremented only when the returned ID is less than + * the last one. */ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new) { @@ -216,17 +220,42 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new) */ if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */ - new->seq = ids->seq++; - if (ids->seq > IPCID_SEQ_MAX) - ids->seq = 0; - idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT); + int max_idx; + + max_idx = max(ids->in_use*3/2, ipc_min_cycle); + max_idx = min(max_idx, ipc_mni); + + /* allocate the idx, with a NULL struct kern_ipc_perm */ + idx = idr_alloc_cyclic(&ids->ipcs_idr, NULL, 0, max_idx, + GFP_NOWAIT); + + if (idx >= 0) { + /* + * idx got allocated successfully. + * Now calculate the sequence number and set the + * pointer for real. + */ + if (idx <= ids->last_idx) { + ids->seq++; + if (ids->seq >= ipcid_seq_max()) + ids->seq = 0; + } + ids->last_idx = idx; + + new->seq = ids->seq; + /* no need for smp_wmb(), this is done + * inside idr_replace, as part of + * rcu_assign_pointer + */ + idr_replace(&ids->ipcs_idr, new, idx); + } } else { new->seq = ipcid_to_seqx(next_id); idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id), 0, GFP_NOWAIT); } if (idx >= 0) - new->id = SEQ_MULTIPLIER * new->seq + idx; + new->id = (new->seq << ipcmni_seq_shift()) + idx; return idx; } @@ -254,8 +283,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit) /* 1) Initialize the refcount so that ipc_rcu_putref works */ refcount_set(&new->refcount, 1); - if (limit > IPCMNI) - limit = IPCMNI; + if (limit > ipc_mni) + limit = ipc_mni; if (ids->in_use >= limit) return -ENOSPC; @@ -738,7 +767,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, if (total >= ids->in_use) return NULL; - for (; pos < IPCMNI; pos++) { + for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { *new_pos = pos + 1; diff --git a/ipc/util.h b/ipc/util.h index e272be622ae7..0fcf8e719b76 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -15,8 +15,37 @@ #include <linux/err.h> #include <linux/ipc_namespace.h> -#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ -#define SEQ_MULTIPLIER (IPCMNI) +/* + * The IPC ID contains 2 separate numbers - index and sequence number. + * By default, + * bits 0-14: index (32k, 15 bits) + * bits 15-30: sequence number (64k, 16 bits) + * + * When IPCMNI extension mode is turned on, the composition changes: + * bits 0-23: index (16M, 24 bits) + * bits 24-30: sequence number (128, 7 bits) + */ +#define IPCMNI_SHIFT 15 +#define IPCMNI_EXTEND_SHIFT 24 +#define IPCMNI_EXTEND_MIN_CYCLE (RADIX_TREE_MAP_SIZE * RADIX_TREE_MAP_SIZE) +#define IPCMNI (1 << IPCMNI_SHIFT) +#define IPCMNI_EXTEND (1 << IPCMNI_EXTEND_SHIFT) + +#ifdef CONFIG_SYSVIPC_SYSCTL +extern int ipc_mni; +extern int ipc_mni_shift; +extern int ipc_min_cycle; + +#define ipcmni_seq_shift() ipc_mni_shift +#define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1) + +#else /* CONFIG_SYSVIPC_SYSCTL */ + +#define ipc_mni IPCMNI +#define ipc_min_cycle ((int)RADIX_TREE_MAP_SIZE) +#define ipcmni_seq_shift() IPCMNI_SHIFT +#define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1) +#endif /* CONFIG_SYSVIPC_SYSCTL */ void sem_init(void); void msg_init(void); @@ -96,9 +125,9 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *); #define IPC_MSG_IDS 1 #define IPC_SHM_IDS 2 -#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) -#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) -#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX) +#define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK) +#define ipcid_to_seqx(id) ((id) >> ipcmni_seq_shift()) +#define ipcid_seq_max() (INT_MAX >> ipcmni_seq_shift()) /* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); @@ -123,8 +152,8 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids) if (ids->in_use == 0) return -1; - if (ids->in_use == IPCMNI) - return IPCMNI - 1; + if (ids->in_use == ipc_mni) + return ipc_mni - 1; return ids->max_idx; } @@ -216,10 +245,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, static inline int sem_check_semmni(struct ipc_namespace *ns) { /* - * Check semmni range [0, IPCMNI] + * Check semmni range [0, ipc_mni] * semmni is the last element of sem_ctls[4] array */ - return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > IPCMNI)) + return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > ipc_mni)) ? -ERANGE : 0; } |