summaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c171
1 files changed, 97 insertions, 74 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b659b07ecd5..a8e17ba13790 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -311,11 +311,9 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* so memcg accounting is performed manually on assigning/releasing
* stacks to tasks. Drop __GFP_ACCOUNT.
*/
- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
THREADINFO_GFP & ~__GFP_ACCOUNT,
- PAGE_KERNEL,
- 0, node, __builtin_return_address(0));
+ node, __builtin_return_address(0));
if (!stack)
return -ENOMEM;
@@ -436,35 +434,6 @@ static struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-#ifdef CONFIG_PER_VMA_LOCK
-
-/* SLAB cache for vm_area_struct.lock */
-static struct kmem_cache *vma_lock_cachep;
-
-static bool vma_lock_alloc(struct vm_area_struct *vma)
-{
- vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
- if (!vma->vm_lock)
- return false;
-
- init_rwsem(&vma->vm_lock->lock);
- vma->vm_lock_seq = UINT_MAX;
-
- return true;
-}
-
-static inline void vma_lock_free(struct vm_area_struct *vma)
-{
- kmem_cache_free(vma_lock_cachep, vma->vm_lock);
-}
-
-#else /* CONFIG_PER_VMA_LOCK */
-
-static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
-static inline void vma_lock_free(struct vm_area_struct *vma) {}
-
-#endif /* CONFIG_PER_VMA_LOCK */
-
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -474,14 +443,46 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
return NULL;
vma_init(vma, mm);
- if (!vma_lock_alloc(vma)) {
- kmem_cache_free(vm_area_cachep, vma);
- return NULL;
- }
return vma;
}
+static void vm_area_init_from(const struct vm_area_struct *src,
+ struct vm_area_struct *dest)
+{
+ dest->vm_mm = src->vm_mm;
+ dest->vm_ops = src->vm_ops;
+ dest->vm_start = src->vm_start;
+ dest->vm_end = src->vm_end;
+ dest->anon_vma = src->anon_vma;
+ dest->vm_pgoff = src->vm_pgoff;
+ dest->vm_file = src->vm_file;
+ dest->vm_private_data = src->vm_private_data;
+ vm_flags_init(dest, src->vm_flags);
+ memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+ sizeof(dest->vm_page_prot));
+ /*
+ * src->shared.rb may be modified concurrently when called from
+ * dup_mmap(), but the clone will reinitialize it.
+ */
+ data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+ memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
+ sizeof(dest->vm_userfaultfd_ctx));
+#ifdef CONFIG_ANON_VMA_NAME
+ dest->anon_name = src->anon_name;
+#endif
+#ifdef CONFIG_SWAP
+ memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
+ sizeof(dest->swap_readahead_info));
+#endif
+#ifndef CONFIG_MMU
+ dest->vm_region = src->vm_region;
+#endif
+#ifdef CONFIG_NUMA
+ dest->vm_policy = src->vm_policy;
+#endif
+}
+
struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
@@ -491,15 +492,8 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
- /*
- * orig->shared.rb may be modified concurrently, but the clone
- * will be reinitialized.
- */
- data_race(memcpy(new, orig, sizeof(*new)));
- if (!vma_lock_alloc(new)) {
- kmem_cache_free(vm_area_cachep, new);
- return NULL;
- }
+ vm_area_init_from(orig, new);
+ vma_lock_init(new, true);
INIT_LIST_HEAD(&new->anon_vma_chain);
vma_numab_state_init(new);
dup_anon_vma_name(orig, new);
@@ -511,35 +505,15 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
return new;
}
-void __vm_area_free(struct vm_area_struct *vma)
+void vm_area_free(struct vm_area_struct *vma)
{
+ /* The vma should be detached while being destroyed. */
+ vma_assert_detached(vma);
vma_numab_state_free(vma);
free_anon_vma_name(vma);
- vma_lock_free(vma);
kmem_cache_free(vm_area_cachep, vma);
}
-#ifdef CONFIG_PER_VMA_LOCK
-static void vm_area_free_rcu_cb(struct rcu_head *head)
-{
- struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
- vm_rcu);
-
- /* The vma should not be locked while being destroyed. */
- VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
- __vm_area_free(vma);
-}
-#endif
-
-void vm_area_free(struct vm_area_struct *vma)
-{
-#ifdef CONFIG_PER_VMA_LOCK
- call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
-#else
- __vm_area_free(vma);
-#endif
-}
-
static void account_kernel_stack(struct task_struct *tsk, int account)
{
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@ -830,6 +804,36 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
+#ifdef CONFIG_MM_ID
+static DEFINE_IDA(mm_ida);
+
+static inline int mm_alloc_id(struct mm_struct *mm)
+{
+ int ret;
+
+ ret = ida_alloc_range(&mm_ida, MM_ID_MIN, MM_ID_MAX, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ mm->mm_id = ret;
+ return 0;
+}
+
+static inline void mm_free_id(struct mm_struct *mm)
+{
+ const mm_id_t id = mm->mm_id;
+
+ mm->mm_id = MM_ID_DUMMY;
+ if (id == MM_ID_DUMMY)
+ return;
+ if (WARN_ON_ONCE(id < MM_ID_MIN || id > MM_ID_MAX))
+ return;
+ ida_free(&mm_ida, id);
+}
+#else /* !CONFIG_MM_ID */
+static inline int mm_alloc_id(struct mm_struct *mm) { return 0; }
+static inline void mm_free_id(struct mm_struct *mm) {}
+#endif /* CONFIG_MM_ID */
+
static void check_mm(struct mm_struct *mm)
{
int i;
@@ -933,6 +937,7 @@ void __mmdrop(struct mm_struct *mm)
WARN_ON_ONCE(mm == current->active_mm);
mm_free_pgd(mm);
+ mm_free_id(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
@@ -1267,6 +1272,15 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
#endif
}
+static void mmap_init_lock(struct mm_struct *mm)
+{
+ init_rwsem(&mm->mmap_lock);
+ mm_lock_seqcount_init(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+ rcuwait_init(&mm->vma_writer_wait);
+#endif
+}
+
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
@@ -1308,6 +1322,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (mm_alloc_pgd(mm))
goto fail_nopgd;
+ if (mm_alloc_id(mm))
+ goto fail_noid;
+
if (init_new_context(p, mm))
goto fail_nocontext;
@@ -1327,6 +1344,8 @@ fail_pcpu:
fail_cid:
destroy_context(mm);
fail_nocontext:
+ mm_free_id(mm);
+fail_noid:
mm_free_pgd(mm);
fail_nopgd:
free_mm(mm);
@@ -3183,6 +3202,11 @@ void __init mm_cache_init(void)
void __init proc_caches_init(void)
{
+ struct kmem_cache_args args = {
+ .use_freeptr_offset = true,
+ .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+ };
+
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3199,11 +3223,10 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
-
- vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
-#ifdef CONFIG_PER_VMA_LOCK
- vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
-#endif
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), &args,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+ SLAB_ACCOUNT);
mmap_init();
nsproxy_cache_init();
}