summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2020-11-11 13:43:57 +0100
committerPeter Zijlstra <peterz@infradead.org>2020-12-03 10:14:51 +0100
commit8af26be062721e52eba1550caf50b712f774c5fd (patch)
tree3927b7054821e97bc397f3a34e64db1e19ee08b4 /kernel/events
parent560dabbdf68bb15f9e241af8f828b1c8c38d6c6f (diff)
downloadlinux-8af26be062721e52eba1550caf50b712f774c5fd.tar.gz
linux-8af26be062721e52eba1550caf50b712f774c5fd.tar.bz2
linux-8af26be062721e52eba1550caf50b712f774c5fd.zip
perf/core: Fix arch_perf_get_page_size()
The (new) page-table walker in arch_perf_get_page_size() is broken in various ways. Specifically while it is used in a lockless manner, it doesn't depend on CONFIG_HAVE_FAST_GUP nor uses the proper _lockless offset methods, nor is careful to only read each entry only once. Also the hugetlb support is broken due to calling pte_page() without first checking pte_special(). Rewrite the whole thing to be a proper lockless page-table walker and employ the new pXX_leaf_size() pgtable functions to determine the pagetable size without looking at the page-frames. Fixes: 51b646b2d9f8 ("perf,mm: Handle non-page-table-aligned hugetlbfs") Fixes: 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE") Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Kan Liang <kan.liang@linux.intel.com> Link: https://lkml.kernel.org/r/20201126124207.GM3040@hirez.programming.kicks-ass.net
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c103
1 files changed, 38 insertions, 65 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d2f3ca792936..a21b0be2f22c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -52,6 +52,7 @@
#include <linux/mount.h>
#include <linux/min_heap.h>
#include <linux/highmem.h>
+#include <linux/pgtable.h>
#include "internal.h"
@@ -7001,90 +7002,62 @@ static u64 perf_virt_to_phys(u64 virt)
return phys_addr;
}
-#ifdef CONFIG_MMU
-
/*
- * Return the MMU page size of a given virtual address.
- *
- * This generic implementation handles page-table aligned huge pages, as well
- * as non-page-table aligned hugetlbfs compound pages.
- *
- * If an architecture supports and uses non-page-table aligned pages in their
- * kernel mapping it will need to provide it's own implementation of this
- * function.
+ * Return the pagetable size of a given virtual address.
*/
-__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
{
- struct page *page;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ u64 size = 0;
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
- return 0;
+#ifdef CONFIG_HAVE_FAST_GUP
+ pgd_t *pgdp, pgd;
+ p4d_t *p4dp, p4d;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
- p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
return 0;
- if (p4d_leaf(*p4d))
- return 1ULL << P4D_SHIFT;
+ if (pgd_leaf(pgd))
+ return pgd_leaf_size(pgd);
- pud = pud_offset(p4d, addr);
- if (!pud_present(*pud))
+ p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (!p4d_present(p4d))
return 0;
- if (pud_leaf(*pud)) {
-#ifdef pud_page
- page = pud_page(*pud);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PUD_SHIFT;
- }
+ if (p4d_leaf(p4d))
+ return p4d_leaf_size(p4d);
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ pudp = pud_offset_lockless(p4dp, p4d, addr);
+ pud = READ_ONCE(*pudp);
+ if (!pud_present(pud))
return 0;
- if (pmd_leaf(*pmd)) {
-#ifdef pmd_page
- page = pmd_page(*pmd);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PMD_SHIFT;
- }
+ if (pud_leaf(pud))
+ return pud_leaf_size(pud);
- pte = pte_offset_map(pmd, addr);
- if (!pte_present(*pte)) {
- pte_unmap(pte);
+ pmdp = pmd_offset_lockless(pudp, pud, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (!pmd_present(pmd))
return 0;
- }
- page = pte_page(*pte);
- if (PageHuge(page)) {
- u64 size = page_size(compound_head(page));
- pte_unmap(pte);
- return size;
- }
+ if (pmd_leaf(pmd))
+ return pmd_leaf_size(pmd);
- pte_unmap(pte);
- return PAGE_SIZE;
-}
+ ptep = pte_offset_map(&pmd, addr);
+ pte = ptep_get_lockless(ptep);
+ if (pte_present(pte))
+ size = pte_leaf_size(pte);
+ pte_unmap(ptep);
+#endif /* CONFIG_HAVE_FAST_GUP */
-#else
-
-static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
+ return size;
}
-#endif
-
static u64 perf_get_page_size(unsigned long addr)
{
struct mm_struct *mm;
@@ -7109,7 +7082,7 @@ static u64 perf_get_page_size(unsigned long addr)
mm = &init_mm;
}
- size = arch_perf_get_page_size(mm, addr);
+ size = perf_get_pgtable_size(mm, addr);
local_irq_restore(flags);