summaryrefslogtreecommitdiffstats
path: root/arch/x86/entry/vdso
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry/vdso')
-rw-r--r--arch/x86/entry/vdso/Makefile39
-rwxr-xr-xarch/x86/entry/vdso/checkundef.sh10
-rw-r--r--arch/x86/entry/vdso/extable.h2
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S22
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdso2c.c21
-rw-r--r--arch/x86/entry/vdso/vdso2c.h20
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c16
-rw-r--r--arch/x86/entry/vdso/vgetrandom-chacha.S178
-rw-r--r--arch/x86/entry/vdso/vgetrandom.c15
-rw-r--r--arch/x86/entry/vdso/vma.c165
11 files changed, 266 insertions, 224 deletions
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 3d64bcc403cf..54d3e9774d62 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -4,47 +4,22 @@
#
# Include the generic Makefile to check the built vDSO:
-include $(srctree)/lib/vdso/Makefile
-
-# Sanitizer runtimes are unavailable and cannot be linked here.
-KASAN_SANITIZE := n
-KMSAN_SANITIZE_vclock_gettime.o := n
-KMSAN_SANITIZE_vdso32/vclock_gettime.o := n
-KMSAN_SANITIZE_vgetcpu.o := n
-KMSAN_SANITIZE_vdso32/vgetcpu.o := n
-
-UBSAN_SANITIZE := n
-KCSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD := y
-
-# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
-KCOV_INSTRUMENT := n
+include $(srctree)/lib/vdso/Makefile.include
# Files to link into the vDSO:
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o
vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
# Files to link into the kernel:
obj-y += vma.o extable.o
-KASAN_SANITIZE_vma.o := y
-UBSAN_SANITIZE_vma.o := y
-KCSAN_SANITIZE_vma.o := y
-
-OBJECT_FILES_NON_STANDARD_vma.o := n
-OBJECT_FILES_NON_STANDARD_extable.o := n
# vDSO images to build:
obj-$(CONFIG_X86_64) += vdso-image-64.o
obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o
obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o
-OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
-OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n
-OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
-OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
-
vobjs := $(addprefix $(obj)/, $(vobjs-y))
vobjs32 := $(addprefix $(obj)/, $(vobjs32-y))
@@ -57,7 +32,7 @@ targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg
CPPFLAGS_vdso.lds += -P -C
-VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
+VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 \
-z max-page-size=4096
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
@@ -98,6 +73,7 @@ CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg
CFLAGS_REMOVE_vsgx.o = -pg
+CFLAGS_REMOVE_vgetrandom.o = -pg
#
# X32 processes use x32 vDSO to access 64bit kernel data.
@@ -157,6 +133,7 @@ KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_32 += -DBUILD_VDSO
ifdef CONFIG_MITIGATION_RETPOLINE
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
@@ -175,12 +152,10 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
quiet_cmd_vdso = VDSO $@
cmd_vdso = $(LD) -o $@ \
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
- -T $(filter %.lds,$^) $(filter %.o,$^) && \
- sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+ -T $(filter %.lds,$^) $(filter %.o,$^)
-VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
+VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 --no-undefined \
$(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack
-GCOV_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
diff --git a/arch/x86/entry/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh
deleted file mode 100755
index 7ee90a9b549d..000000000000
--- a/arch/x86/entry/vdso/checkundef.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-nm="$1"
-file="$2"
-$nm "$file" | grep '^ *U' > /dev/null 2>&1
-if [ $? -eq 1 ]; then
- exit 0
-else
- echo "$file: undefined symbols found" >&2
- exit 1
-fi
diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h
index b56f6b012941..baba612b832c 100644
--- a/arch/x86/entry/vdso/extable.h
+++ b/arch/x86/entry/vdso/extable.h
@@ -7,7 +7,7 @@
* vDSO uses a dedicated handler the addresses are relative to the overall
* exception table, not each individual entry.
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \
ASM_VDSO_EXTABLE_HANDLE from to
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index bafa73f09e92..ec1ac191a057 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/vdso.h>
+#include <asm/vdso/vsyscall.h>
+#include <vdso/datapage.h>
/*
* Linker script for vDSO. This is an ELF shared object prelinked to
@@ -16,23 +18,11 @@ SECTIONS
* segment.
*/
- vvar_start = . - 4 * PAGE_SIZE;
- vvar_page = vvar_start;
+ VDSO_VVAR_SYMS
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#include <asm/vvar.h>
-#undef EMIT_VVAR
-
- pvclock_page = vvar_start + PAGE_SIZE;
- hvclock_page = vvar_start + 2 * PAGE_SIZE;
- timens_page = vvar_start + 3 * PAGE_SIZE;
-
-#undef _ASM_X86_VVAR_H
- /* Place all vvars in timens too at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
-#include <asm/vvar.h>
-#undef EMIT_VVAR
+ vclock_pages = VDSO_VCLOCK_PAGES_START(vdso_u_data);
+ pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE;
+ hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index e8c60ae7a7c8..0bab5f4af6d1 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -30,6 +30,8 @@ VERSION {
#ifdef CONFIG_X86_SGX
__vdso_sgx_enter_enclave;
#endif
+ getrandom;
+ __vdso_getrandom;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 90d15f2a7205..f84e8f8fa5fe 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -69,33 +69,12 @@
const char *outfilename;
-/* Symbols that we need in vdso2c. */
-enum {
- sym_vvar_start,
- sym_vvar_page,
- sym_pvclock_page,
- sym_hvclock_page,
- sym_timens_page,
-};
-
-const int special_pages[] = {
- sym_vvar_page,
- sym_pvclock_page,
- sym_hvclock_page,
- sym_timens_page,
-};
-
struct vdso_sym {
const char *name;
bool export;
};
struct vdso_sym required_syms[] = {
- [sym_vvar_start] = {"vvar_start", true},
- [sym_vvar_page] = {"vvar_page", true},
- [sym_pvclock_page] = {"pvclock_page", true},
- [sym_hvclock_page] = {"hvclock_page", true},
- [sym_timens_page] = {"timens_page", true},
{"VDSO32_NOTE_MASK", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 67b3e37576a6..78ed1c1f28b9 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -150,26 +150,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
}
- /* Validate mapping addresses. */
- for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
- INT_BITS symval = syms[special_pages[i]];
-
- if (!symval)
- continue; /* The mapping isn't used; ignore it. */
-
- if (symval % 4096)
- fail("%s must be a multiple of 4096\n",
- required_syms[i].name);
- if (symval + 4096 < syms[sym_vvar_start])
- fail("%s underruns vvar_start\n",
- required_syms[i].name);
- if (symval + 4096 > 0)
- fail("%s is on the wrong side of the vdso text\n",
- required_syms[i].name);
- }
- if (syms[sym_vvar_start] % 4096)
- fail("vvar_begin must be a multiple of 4096\n");
-
if (!image_name) {
fwrite(stripped_addr, stripped_len, 1, outfile);
return;
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 76e4e74f35b5..8894013eea1d 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -51,15 +51,17 @@ __setup("vdso32=", vdso32_setup);
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
#endif
-#ifdef CONFIG_X86_64
#ifdef CONFIG_SYSCTL
-/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
-static struct ctl_table abi_table2[] = {
+static const struct ctl_table vdso_table[] = {
{
+#ifdef CONFIG_X86_64
.procname = "vsyscall32",
+#else
+ .procname = "vdso_enabled",
+#endif
.data = &vdso32_enabled,
.maxlen = sizeof(int),
.mode = 0644,
@@ -71,10 +73,14 @@ static struct ctl_table abi_table2[] = {
static __init int ia32_binfmt_init(void)
{
- register_sysctl("abi", abi_table2);
+#ifdef CONFIG_X86_64
+ /* Register vsyscall32 into the ABI table */
+ register_sysctl("abi", vdso_table);
+#else
+ register_sysctl_init("vm", vdso_table);
+#endif
return 0;
}
__initcall(ia32_binfmt_init);
#endif /* CONFIG_SYSCTL */
-#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..bcba5639b8ee
--- /dev/null
+++ b/arch/x86/entry/vdso/vgetrandom-chacha.S
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+.section .rodata, "a"
+.align 16
+CONSTANTS: .octa 0x6b20657479622d323320646e61707865
+.text
+
+/*
+ * Very basic SSE2 implementation of ChaCha20. Produces a given positive number
+ * of blocks of output with a nonce of 0, taking an input key and 8-byte
+ * counter. Importantly does not spill to the stack. Its arguments are:
+ *
+ * rdi: output bytes
+ * rsi: 32-byte key input
+ * rdx: 8-byte counter input/output
+ * rcx: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+.set output, %rdi
+.set key, %rsi
+.set counter, %rdx
+.set nblocks, %rcx
+.set i, %al
+/* xmm registers are *not* callee-save. */
+.set temp, %xmm0
+.set state0, %xmm1
+.set state1, %xmm2
+.set state2, %xmm3
+.set state3, %xmm4
+.set copy0, %xmm5
+.set copy1, %xmm6
+.set copy2, %xmm7
+.set copy3, %xmm8
+.set one, %xmm9
+
+ /* copy0 = "expand 32-byte k" */
+ movaps CONSTANTS(%rip),copy0
+ /* copy1,copy2 = key */
+ movups 0x00(key),copy1
+ movups 0x10(key),copy2
+ /* copy3 = counter || zero nonce */
+ movq 0x00(counter),copy3
+ /* one = 1 || 0 */
+ movq $1,%rax
+ movq %rax,one
+
+.Lblock:
+ /* state0,state1,state2,state3 = copy0,copy1,copy2,copy3 */
+ movdqa copy0,state0
+ movdqa copy1,state1
+ movdqa copy2,state2
+ movdqa copy3,state3
+
+ movb $10,i
+.Lpermute:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $16,temp
+ psrld $16,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $12,temp
+ psrld $20,state1
+ por temp,state1
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $8,temp
+ psrld $24,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $7,temp
+ psrld $25,state1
+ por temp,state1
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ pshufd $0x39,state1,state1
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ pshufd $0x4e,state2,state2
+ /* state3[0,1,2,3] = state3[3,0,1,2] */
+ pshufd $0x93,state3,state3
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $16,temp
+ psrld $16,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $12,temp
+ psrld $20,state1
+ por temp,state1
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $8,temp
+ psrld $24,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $7,temp
+ psrld $25,state1
+ por temp,state1
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ pshufd $0x93,state1,state1
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ pshufd $0x4e,state2,state2
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ pshufd $0x39,state3,state3
+
+ decb i
+ jnz .Lpermute
+
+ /* output0 = state0 + copy0 */
+ paddd copy0,state0
+ movups state0,0x00(output)
+ /* output1 = state1 + copy1 */
+ paddd copy1,state1
+ movups state1,0x10(output)
+ /* output2 = state2 + copy2 */
+ paddd copy2,state2
+ movups state2,0x20(output)
+ /* output3 = state3 + copy3 */
+ paddd copy3,state3
+ movups state3,0x30(output)
+
+ /* ++copy3.counter */
+ paddq one,copy3
+
+ /* output += 64, --nblocks */
+ addq $64,output
+ decq nblocks
+ jnz .Lblock
+
+ /* counter = copy3.counter */
+ movq copy3,0x00(counter)
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ pxor state0,state0
+ pxor state1,state1
+ pxor state2,state2
+ pxor state3,state3
+ pxor copy1,copy1
+ pxor copy2,copy2
+ pxor temp,temp
+
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vgetrandom.c
new file mode 100644
index 000000000000..430862b8977c
--- /dev/null
+++ b/arch/x86/entry/vdso/vgetrandom.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
+
+ssize_t getrandom(void *, size_t, unsigned int, void *, size_t)
+ __attribute__((weak, alias("__vdso_getrandom")));
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 6d83ceb7f1ba..adb299d3b6a1 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,29 +14,20 @@
#include <linux/elf.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
-#include <linux/time_namespace.h>
+#include <linux/vdso_datastore.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
-#include <asm/vvar.h>
#include <asm/tlb.h>
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
+#include <asm/vdso/vsyscall.h>
#include <clocksource/hyperv_timer.h>
-#undef _ASM_X86_VVAR_H
-#define EMIT_VVAR(name, offset) \
- const size_t name ## _offset = offset;
-#include <asm/vvar.h>
-
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page + _vdso_data_offset);
-}
-#undef EMIT_VVAR
+static_assert(VDSO_NR_PAGES + VDSO_NR_VCLOCK_PAGES == __VDSO_PAGES);
unsigned int vclocks_used __read_mostly;
@@ -56,7 +47,6 @@ int __init init_vdso_image(const struct vdso_image *image)
return 0;
}
-static const struct vm_special_mapping vvar_mapping;
struct linux_binprm;
static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
@@ -100,106 +90,32 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-#ifdef CONFIG_TIME_NS
-/*
- * The vvar page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, &vvar_mapping))
- zap_vma_pages(vma);
- }
- mmap_read_unlock(mm);
-
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
{
- const struct vdso_image *image = vma->vm_mm->context.vdso_image;
- unsigned long pfn;
- long sym_offset;
-
- if (!image)
- return VM_FAULT_SIGBUS;
-
- sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
- image->sym_vvar_start;
-
- /*
- * Sanity check: a symbol offset of zero means that the page
- * does not exist for this vdso image, not that the page is at
- * offset zero relative to the text mapping. This should be
- * impossible here, because sym_offset should only be zero for
- * the page past the end of the vvar mapping.
- */
- if (sym_offset == 0)
- return VM_FAULT_SIGBUS;
-
- if (sym_offset == image->sym_vvar_page) {
- struct page *timens_page = find_timens_vvar_page(vma);
-
- pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
-
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the sym_vvar_page offset and
- * the real VVAR page is mapped with the sym_timens_page
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (timens_page) {
- unsigned long addr;
- vm_fault_t err;
-
- /*
- * Optimization: inside time namespace pre-fault
- * VVAR page too. As on timens page there are only
- * offsets for clocks on VVAR, it'll be faulted
- * shortly by VDSO code.
- */
- addr = vmf->address + (image->sym_timens_page - sym_offset);
- err = vmf_insert_pfn(vma, addr, pfn);
- if (unlikely(err & VM_FAULT_ERROR))
- return err;
-
- pfn = page_to_pfn(timens_page);
- }
-
- return vmf_insert_pfn(vma, vmf->address, pfn);
- } else if (sym_offset == image->sym_pvclock_page) {
+ switch (vmf->pgoff) {
+#ifdef CONFIG_PARAVIRT_CLOCK
+ case VDSO_PAGE_PVCLOCK_OFFSET:
+ {
struct pvclock_vsyscall_time_info *pvti =
pvclock_get_pvti_cpu0_va();
- if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
+
+ if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK))
return vmf_insert_pfn_prot(vma, vmf->address,
__pa(pvti) >> PAGE_SHIFT,
pgprot_decrypted(vma->vm_page_prot));
- }
- } else if (sym_offset == image->sym_hvclock_page) {
- pfn = hv_get_tsc_pfn();
-
+ break;
+ }
+#endif /* CONFIG_PARAVIRT_CLOCK */
+#ifdef CONFIG_HYPERV_TIMER
+ case VDSO_PAGE_HVCLOCK_OFFSET:
+ {
+ unsigned long pfn = hv_get_tsc_pfn();
if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
return vmf_insert_pfn(vma, vmf->address, pfn);
- } else if (sym_offset == image->sym_timens_page) {
- struct page *timens_page = find_timens_vvar_page(vma);
-
- if (!timens_page)
- return VM_FAULT_SIGBUS;
-
- pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
- return vmf_insert_pfn(vma, vmf->address, pfn);
+ break;
+ }
+#endif /* CONFIG_HYPERV_TIMER */
}
return VM_FAULT_SIGBUS;
@@ -210,9 +126,9 @@ static const struct vm_special_mapping vdso_mapping = {
.fault = vdso_fault,
.mremap = vdso_mremap,
};
-static const struct vm_special_mapping vvar_mapping = {
- .name = "[vvar]",
- .fault = vvar_fault,
+static const struct vm_special_mapping vvar_vclock_mapping = {
+ .name = "[vvar_vclock]",
+ .fault = vvar_vclock_fault,
};
/*
@@ -231,13 +147,13 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
return -EINTR;
addr = get_unmapped_area(NULL, addr,
- image->size - image->sym_vvar_start, 0, 0);
+ image->size + __VDSO_PAGES * PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
- text_start = addr - image->sym_vvar_start;
+ text_start = addr + __VDSO_PAGES * PAGE_SIZE;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -246,7 +162,8 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
text_start,
image->size,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_SEALED_SYSMAP,
&vdso_mapping);
if (IS_ERR(vma)) {
@@ -254,21 +171,30 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
goto up_fail;
}
+ vma = vdso_install_vvar_mapping(mm, addr);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ do_munmap(mm, text_start, image->size, NULL);
+ goto up_fail;
+ }
+
vma = _install_special_mapping(mm,
- addr,
- -image->sym_vvar_start,
+ VDSO_VCLOCK_PAGES_START(addr),
+ VDSO_NR_VCLOCK_PAGES * PAGE_SIZE,
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
- VM_PFNMAP,
- &vvar_mapping);
+ VM_PFNMAP|VM_SEALED_SYSMAP,
+ &vvar_vclock_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size, NULL);
- } else {
- current->mm->context.vdso = (void __user *)text_start;
- current->mm->context.vdso_image = image;
+ do_munmap(mm, addr, image->size, NULL);
+ goto up_fail;
}
+ current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
+
up_fail:
mmap_write_unlock(mm);
return ret;
@@ -290,7 +216,8 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
*/
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_mapping) ||
- vma_is_special_mapping(vma, &vvar_mapping)) {
+ vma_is_special_mapping(vma, &vdso_vvar_mapping) ||
+ vma_is_special_mapping(vma, &vvar_vclock_mapping)) {
mmap_write_unlock(mm);
return -EEXIST;
}