diff options
author | Nicholas Piggin <npiggin@gmail.com> | 2018-06-01 20:01:20 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2018-06-03 20:40:36 +1000 |
commit | 85bcfaf69cbd610fdfac3351cf385809a2f4a93b (patch) | |
tree | 0c3ec307217742cf1857462744ca588ecca006e9 /arch/powerpc/mm | |
parent | f1cb8f9beba8699dd1b4518418191499e53f7b17 (diff) | |
download | linux-85bcfaf69cbd610fdfac3351cf385809a2f4a93b.tar.gz linux-85bcfaf69cbd610fdfac3351cf385809a2f4a93b.tar.bz2 linux-85bcfaf69cbd610fdfac3351cf385809a2f4a93b.zip |
powerpc/64s/radix: optimise pte_update
Implementing pte_update with pte_xchg (which uses cmpxchg) is
inefficient. A single larx/stcx. works fine, no need for the less
efficient cmpxchg sequence.
Then remove the memory barriers from the operation. There is a
requirement for TLB flushing to load mm_cpumask after the store
that reduces pte permissions, which is moved into the TLB flush
code.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 11 |
2 files changed, 14 insertions, 3 deletions
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 0ab297c4cfad..f84e14f23e50 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * in switch_slb(), and/or the store of paca->mm_ctx_id in * copy_mm_to_paca(). * - * On the read side the barrier is in pte_xchg(), which orders - * the store to the PTE vs the load of mm_cpumask. + * On the other side, the barrier is in mm/tlb-radix.c for + * radix which orders earlier stores to clear the PTEs vs + * the load of mm_cpumask. And pte_xchg which does the same + * thing for hash. * * This full barrier is needed by membarrier when switching * between processes after store to rq->curr, before user-space diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 5ac3206c51cc..cdc50398fd60 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -524,6 +524,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); + /* + * Order loads of mm_cpumask vs previous stores to clear ptes before + * the invalidate. See barrier in switch_mm_irqs_off + */ + smp_mb(); if (!mm_is_thread_local(mm)) { if (mm_needs_flush_escalation(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); @@ -544,6 +549,7 @@ void radix__flush_all_mm(struct mm_struct *mm) return; preempt_disable(); + smp_mb(); /* see radix__flush_tlb_mm */ if (!mm_is_thread_local(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); else @@ -568,6 +574,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, return; preempt_disable(); + smp_mb(); /* see radix__flush_tlb_mm */ if (!mm_is_thread_local(mm)) _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else @@ -630,6 +637,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, return; preempt_disable(); + smp_mb(); /* see radix__flush_tlb_mm */ if (mm_is_thread_local(mm)) { local = true; full = (end == TLB_FLUSH_ALL || @@ -791,6 +799,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, return; preempt_disable(); + smp_mb(); /* see radix__flush_tlb_mm */ if (mm_is_thread_local(mm)) { local = true; full = (end == TLB_FLUSH_ALL || @@ -849,7 +858,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); - + smp_mb(); /* see radix__flush_tlb_mm */ if (mm_is_thread_local(mm)) { _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } else { |