diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-03-22 10:54:24 +0100 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2017-02-08 14:13:25 +0100 |
commit | 57d7f939e7bdd746992f5c318a78697ba837c523 (patch) | |
tree | 80c01c9c446fc190432798a6d1ecf0c8e5456b57 /arch/s390/mm | |
parent | 2583b848cad049cf5f3f0a03af8b140668b376f3 (diff) | |
download | linux-57d7f939e7bdd746992f5c318a78697ba837c523.tar.gz linux-57d7f939e7bdd746992f5c318a78697ba837c523.tar.bz2 linux-57d7f939e7bdd746992f5c318a78697ba837c523.zip |
s390: add no-execute support
Bit 0x100 of a page table, segment table of region table entry
can be used to disallow code execution for the virtual addresses
associated with the entry.
There is one tricky bit, the system call to return from a signal
is part of the signal frame written to the user stack. With a
non-executable stack this would stop working. To avoid breaking
things the protection fault handler checks the opcode that caused
the fault for 0x0a77 (sys_sigreturn) and 0x0aad (sys_rt_sigreturn)
and injects a system call. This is preferable to the alternative
solution with a stub function in the vdso because it works for
vdso=off and statically linked binaries as well.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 15 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 42 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 117 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 42 |
7 files changed, 146 insertions, 85 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 861880df12c7..5a46b1d7e578 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -49,8 +49,8 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_printf(m, "I\n"); return; } - seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); - seq_putc(m, '\n'); + seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW "); + seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } static void note_page(struct seq_file *m, struct pg_state *st, @@ -117,7 +117,8 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { st->current_address = addr; pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); + prot = pte_val(*pte) & + (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); note_page(m, st, prot, 4); addr += PAGE_SIZE; } @@ -135,7 +136,9 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pmd = pmd_offset(pud, addr); if (!pmd_none(*pmd)) { if (pmd_large(*pmd)) { - prot = pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT; + prot = pmd_val(*pmd) & + (_SEGMENT_ENTRY_PROTECT | + _SEGMENT_ENTRY_NOEXEC); note_page(m, st, prot, 3); } else walk_pte_level(m, st, pmd, addr); @@ -157,7 +160,9 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pud = pud_offset(pgd, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { - prot = pud_val(*pud) & _REGION_ENTRY_PROTECT; + prot = pud_val(*pud) & + (_REGION_ENTRY_PROTECT | + _REGION_ENTRY_NOEXEC); note_page(m, st, prot, 2); } else walk_pmd_level(m, st, pud, addr); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d1faae5cdd12..bb5560eb2435 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -311,12 +311,34 @@ static noinline void do_sigbus(struct pt_regs *regs) force_sig_info(SIGBUS, &si, tsk); } -static noinline void do_fault_error(struct pt_regs *regs, int fault) +static noinline int signal_return(struct pt_regs *regs) +{ + u16 instruction; + int rc; + + rc = __get_user(instruction, (u16 __user *) regs->psw.addr); + if (rc) + return rc; + if (instruction == 0x0a77) { + set_pt_regs_flag(regs, PIF_SYSCALL); + regs->int_code = 0x00040077; + return 0; + } else if (instruction == 0x0aad) { + set_pt_regs_flag(regs, PIF_SYSCALL); + regs->int_code = 0x000400ad; + return 0; + } + return -EACCES; +} + +static noinline void do_fault_error(struct pt_regs *regs, int access, int fault) { int si_code; switch (fault) { case VM_FAULT_BADACCESS: + if (access == VM_EXEC && signal_return(regs) == 0) + break; case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (user_mode(regs)) { @@ -324,7 +346,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) si_code = (fault == VM_FAULT_BADMAP) ? SEGV_MAPERR : SEGV_ACCERR; do_sigsegv(regs, si_code); - return; + break; } case VM_FAULT_BADCONTEXT: case VM_FAULT_PFAULT: @@ -525,7 +547,7 @@ out: void do_protection_exception(struct pt_regs *regs) { unsigned long trans_exc_code; - int fault; + int access, fault; trans_exc_code = regs->int_parm_long; /* @@ -544,9 +566,17 @@ void do_protection_exception(struct pt_regs *regs) do_low_address(regs); return; } - fault = do_exception(regs, VM_WRITE); + if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) { + regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) | + (regs->psw.addr & PAGE_MASK); + access = VM_EXEC; + fault = VM_FAULT_BADACCESS; + } else { + access = VM_WRITE; + fault = do_exception(regs, access); + } if (unlikely(fault)) - do_fault_error(regs, fault); + do_fault_error(regs, access, fault); } NOKPROBE_SYMBOL(do_protection_exception); @@ -557,7 +587,7 @@ void do_dat_exception(struct pt_regs *regs) access = VM_READ | VM_EXEC | VM_WRITE; fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, fault); + do_fault_error(regs, access, fault); } NOKPROBE_SYMBOL(do_dat_exception); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 4a0c5bce3552..a03816227719 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -59,6 +59,8 @@ static inline unsigned long __pte_to_rste(pte_t pte) rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, _SEGMENT_ENTRY_SOFT_DIRTY); #endif + rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, + _SEGMENT_ENTRY_NOEXEC); } else rste = _SEGMENT_ENTRY_INVALID; return rste; @@ -113,6 +115,8 @@ static inline pte_t __rste_to_pte(unsigned long rste) pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_DIRTY); #endif + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, + _PAGE_NOEXEC); } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -121,7 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - unsigned long rste = __pte_to_rste(pte); + unsigned long rste; + + rste = __pte_to_rste(pte); + if (!MACHINE_HAS_NX) + rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index b67454ad8408..ba0c8d18e10d 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -137,6 +137,9 @@ void __init mem_init(void) void free_initmem(void) { + __set_memory((unsigned long) _sinittext, + (_einittext - _sinittext) >> PAGE_SHIFT, + SET_MEMORY_RW | SET_MEMORY_NX); free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 44f150312a16..a1543b74ee00 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -81,24 +81,24 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } } -struct cpa { - unsigned int set_ro : 1; - unsigned int clear_ro : 1; -}; - static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, - struct cpa cpa) + unsigned long flags) { pte_t *ptep, new; ptep = pte_offset(pmdp, addr); do { - if (pte_none(*ptep)) + new = *ptep; + if (pte_none(new)) return -EINVAL; - if (cpa.set_ro) - new = pte_wrprotect(*ptep); - else if (cpa.clear_ro) - new = pte_mkwrite(pte_mkdirty(*ptep)); + if (flags & SET_MEMORY_RO) + new = pte_wrprotect(new); + else if (flags & SET_MEMORY_RW) + new = pte_mkwrite(pte_mkdirty(new)); + if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + pte_val(new) |= _PAGE_NOEXEC; + else if (flags & SET_MEMORY_X) + pte_val(new) &= ~_PAGE_NOEXEC; pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; addr += PAGE_SIZE; @@ -112,14 +112,17 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) unsigned long pte_addr, prot; pte_t *pt_dir, *ptep; pmd_t new; - int i, ro; + int i, ro, nx; pt_dir = vmem_pte_alloc(); if (!pt_dir) return -ENOMEM; pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); + nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC); prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + if (!nx) + prot &= ~_PAGE_NOEXEC; ptep = pt_dir; for (i = 0; i < PTRS_PER_PTE; i++) { pte_val(*ptep) = pte_addr | prot; @@ -133,19 +136,24 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) return 0; } -static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa) +static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, + unsigned long flags) { - pmd_t new; - - if (cpa.set_ro) - new = pmd_wrprotect(*pmdp); - else if (cpa.clear_ro) - new = pmd_mkwrite(pmd_mkdirty(*pmdp)); + pmd_t new = *pmdp; + + if (flags & SET_MEMORY_RO) + new = pmd_wrprotect(new); + else if (flags & SET_MEMORY_RW) + new = pmd_mkwrite(pmd_mkdirty(new)); + if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; + else if (flags & SET_MEMORY_X) + pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, - struct cpa cpa) + unsigned long flags) { unsigned long next; pmd_t *pmdp; @@ -163,9 +171,9 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, return rc; continue; } - modify_pmd_page(pmdp, addr, cpa); + modify_pmd_page(pmdp, addr, flags); } else { - rc = walk_pte_level(pmdp, addr, next, cpa); + rc = walk_pte_level(pmdp, addr, next, flags); if (rc) return rc; } @@ -181,14 +189,17 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) unsigned long pmd_addr, prot; pmd_t *pm_dir, *pmdp; pud_t new; - int i, ro; + int i, ro, nx; pm_dir = vmem_pmd_alloc(); if (!pm_dir) return -ENOMEM; pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); + nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC); prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); + if (!nx) + prot &= ~_SEGMENT_ENTRY_NOEXEC; pmdp = pm_dir; for (i = 0; i < PTRS_PER_PMD; i++) { pmd_val(*pmdp) = pmd_addr | prot; @@ -202,19 +213,24 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) return 0; } -static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa) +static void modify_pud_page(pud_t *pudp, unsigned long addr, + unsigned long flags) { - pud_t new; - - if (cpa.set_ro) - new = pud_wrprotect(*pudp); - else if (cpa.clear_ro) - new = pud_mkwrite(pud_mkdirty(*pudp)); + pud_t new = *pudp; + + if (flags & SET_MEMORY_RO) + new = pud_wrprotect(new); + else if (flags & SET_MEMORY_RW) + new = pud_mkwrite(pud_mkdirty(new)); + if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + pud_val(new) |= _REGION_ENTRY_NOEXEC; + else if (flags & SET_MEMORY_X) + pud_val(new) &= ~_REGION_ENTRY_NOEXEC; pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, - struct cpa cpa) + unsigned long flags) { unsigned long next; pud_t *pudp; @@ -232,9 +248,9 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, break; continue; } - modify_pud_page(pudp, addr, cpa); + modify_pud_page(pudp, addr, flags); } else { - rc = walk_pmd_level(pudp, addr, next, cpa); + rc = walk_pmd_level(pudp, addr, next, flags); } pudp++; addr = next; @@ -246,7 +262,7 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, static DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, - struct cpa cpa) + unsigned long flags) { unsigned long next; int rc = -EINVAL; @@ -262,7 +278,7 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (pgd_none(*pgdp)) break; next = pgd_addr_end(addr, end); - rc = walk_pud_level(pgdp, addr, next, cpa); + rc = walk_pud_level(pgdp, addr, next, flags); if (rc) break; cond_resched(); @@ -271,35 +287,10 @@ static int change_page_attr(unsigned long addr, unsigned long end, return rc; } -int set_memory_ro(unsigned long addr, int numpages) +int __set_memory(unsigned long addr, int numpages, unsigned long flags) { - struct cpa cpa = { - .set_ro = 1, - }; - addr &= PAGE_MASK; - return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); -} - -int set_memory_rw(unsigned long addr, int numpages) -{ - struct cpa cpa = { - .clear_ro = 1, - }; - - addr &= PAGE_MASK; - return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); -} - -/* not possible */ -int set_memory_nx(unsigned long addr, int numpages) -{ - return 0; -} - -int set_memory_x(unsigned long addr, int numpages) -{ - return 0; + return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -339,7 +330,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) = address | pgprot_val(PAGE_KERNEL); + pte_val(*pte) &= ~_PAGE_INVALID; address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7a1897c51c54..190d0c65904a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -274,6 +274,8 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; + if (!MACHINE_HAS_NX) + pte_val(pte) &= ~_PAGE_NOEXEC; if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 45becc8a44ec..253046344b3c 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -79,6 +79,7 @@ pte_t __ref *vmem_pte_alloc(void) */ static int vmem_add_mem(unsigned long start, unsigned long size) { + unsigned long pgt_prot, sgt_prot, r3_prot; unsigned long pages4k, pages1m, pages2g; unsigned long end = start + size; unsigned long address = start; @@ -88,6 +89,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size) pte_t *pt_dir; int ret = -ENOMEM; + pgt_prot = pgprot_val(PAGE_KERNEL); + sgt_prot = pgprot_val(SEGMENT_KERNEL); + r3_prot = pgprot_val(REGION3_KERNEL); + if (!MACHINE_HAS_NX) { + pgt_prot &= ~_PAGE_NOEXEC; + sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; + r3_prot &= ~_REGION_ENTRY_NOEXEC; + } pages4k = pages1m = pages2g = 0; while (address < end) { pg_dir = pgd_offset_k(address); @@ -101,7 +110,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL); + pud_val(*pu_dir) = address | r3_prot; address += PUD_SIZE; pages2g++; continue; @@ -116,7 +125,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL); + pmd_val(*pm_dir) = address | sgt_prot; address += PMD_SIZE; pages1m++; continue; @@ -129,7 +138,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = address | pgprot_val(PAGE_KERNEL); + pte_val(*pt_dir) = address | pgt_prot; address += PAGE_SIZE; pages4k++; } @@ -200,6 +209,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { + unsigned long pgt_prot, sgt_prot; unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; @@ -207,6 +217,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pte_t *pt_dir; int ret = -ENOMEM; + pgt_prot = pgprot_val(PAGE_KERNEL); + sgt_prot = pgprot_val(SEGMENT_KERNEL); + if (!MACHINE_HAS_NX) { + pgt_prot &= ~_PAGE_NOEXEC; + sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; + } for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -238,8 +254,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) new_page = vmemmap_alloc_block(PMD_SIZE, node); if (!new_page) goto out; - pmd_val(*pm_dir) = __pa(new_page) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; + pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -259,8 +274,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) new_page = vmemmap_alloc_block(PAGE_SIZE, node); if (!new_page) goto out; - pte_val(*pt_dir) = - __pa(new_page) | pgprot_val(PAGE_KERNEL); + pte_val(*pt_dir) = __pa(new_page) | pgt_prot; } address += PAGE_SIZE; } @@ -372,13 +386,21 @@ out: */ void __init vmem_map_init(void) { - unsigned long size = _eshared - _stext; struct memblock_region *reg; for_each_memblock(memory, reg) vmem_add_mem(reg->base, reg->size); - set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT); - pr_info("Write protected kernel read-only data: %luk\n", size >> 10); + __set_memory((unsigned long) _stext, + (_etext - _stext) >> PAGE_SHIFT, + SET_MEMORY_RO | SET_MEMORY_X); + __set_memory((unsigned long) _etext, + (_eshared - _etext) >> PAGE_SHIFT, + SET_MEMORY_RO); + __set_memory((unsigned long) _sinittext, + (_einittext - _sinittext) >> PAGE_SHIFT, + SET_MEMORY_RO | SET_MEMORY_X); + pr_info("Write protected kernel read-only data: %luk\n", + (_eshared - _stext) >> 10); } /* |