summaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-12 10:20:12 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-12 10:20:12 +0900
commit39cf275a1a18ba3c7eb9b986c5c9b35b57332798 (patch)
tree40b119ca9d2fbaf8128d3fa25f4c64669002b0c0 /mm/huge_memory.c
parentad5d69899e52792671c1aa6c7360464c7edfe09c (diff)
parente5137b50a0640009fd63a3e65c14bc6e1be8796a (diff)
downloadlinux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.tar.gz
linux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.tar.bz2
linux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.zip
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "The main changes in this cycle are: - (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik van Riel, Peter Zijlstra et al. Yay! - optimize preemption counter handling: merge the NEED_RESCHED flag into the preempt_count variable, by Peter Zijlstra. - wait.h fixes and code reorganization from Peter Zijlstra - cfs_bandwidth fixes from Ben Segall - SMP load-balancer cleanups from Peter Zijstra - idle balancer improvements from Jason Low - other fixes and cleanups" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits) ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED stop_machine: Fix race between stop_two_cpus() and stop_cpus() sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus sched: Fix asymmetric scheduling for POWER7 sched: Move completion code from core.c to completion.c sched: Move wait code from core.c to wait.c sched: Move wait.c into kernel/sched/ sched/wait: Fix __wait_event_interruptible_lock_irq_timeout() sched: Avoid throttle_cfs_rq() racing with period_timer stopping sched: Guarantee new group-entities always have weight sched: Fix hrtimer_cancel()/rq->lock deadlock sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining sched: Fix race on toggling cfs_bandwidth_used sched: Remove extra put_online_cpus() inside sched_setaffinity() sched/rt: Fix task_tick_rt() comment sched/wait: Fix build breakage sched/wait: Introduce prepare_to_wait_event() sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too sched: Remove get_online_cpus() usage sched: Fix race in migrate_swap_stop() ...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c55
1 files changed, 44 insertions, 11 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cca80d96e509..2612f60f53ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1282,19 +1282,32 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
unsigned long haddr = addr & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id();
- int target_nid;
+ int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
+ int flags = 0;
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
page = pmd_page(pmd);
+ BUG_ON(is_huge_zero_page(page));
page_nid = page_to_nid(page);
+ last_cpupid = page_cpupid_last(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
- if (page_nid == this_nid)
+ if (page_nid == this_nid) {
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+ flags |= TNF_FAULT_LOCAL;
+ }
+
+ /*
+ * Avoid grouping on DSO/COW pages in specific and RO pages
+ * in general, RO pages shouldn't hurt as much anyway since
+ * they can be in shared cache state.
+ */
+ if (!pmd_write(pmd))
+ flags |= TNF_NO_GROUP;
/*
* Acquire the page lock to serialise THP migrations but avoid dropping
@@ -1325,7 +1338,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
lock_page(page);
anon_vma = page_lock_anon_vma_read(page);
- /* Confirm the PTE did not while locked */
+ /* Confirm the PMD did not change while page_table_lock was released */
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) {
unlock_page(page);
@@ -1341,8 +1354,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(&mm->page_table_lock);
migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, page, target_nid);
- if (migrated)
+ if (migrated) {
+ flags |= TNF_MIGRATED;
page_nid = target_nid;
+ }
goto out;
clear_pmdnuma:
@@ -1360,7 +1375,7 @@ out:
page_unlock_anon_vma_read(anon_vma);
if (page_nid != -1)
- task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
+ task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
return 0;
}
@@ -1458,6 +1473,12 @@ out:
return ret;
}
+/*
+ * Returns
+ * - 0 if PMD could not be locked
+ * - 1 if PMD was locked but protections unchange and TLB flush unnecessary
+ * - HPAGE_PMD_NR is protections changed and TLB flush necessary
+ */
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot, int prot_numa)
{
@@ -1466,22 +1487,34 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
pmd_t entry;
- entry = pmdp_get_and_clear(mm, addr, pmd);
+ ret = 1;
if (!prot_numa) {
+ entry = pmdp_get_and_clear(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
+ ret = HPAGE_PMD_NR;
BUG_ON(pmd_write(entry));
} else {
struct page *page = pmd_page(*pmd);
- /* only check non-shared pages */
- if (page_mapcount(page) == 1 &&
+ /*
+ * Do not trap faults against the zero page. The
+ * read-only data is likely to be read-cached on the
+ * local CPU cache and it is less useful to know about
+ * local vs remote hits on the zero page.
+ */
+ if (!is_huge_zero_page(page) &&
!pmd_numa(*pmd)) {
+ entry = pmdp_get_and_clear(mm, addr, pmd);
entry = pmd_mknuma(entry);
+ ret = HPAGE_PMD_NR;
}
}
- set_pmd_at(mm, addr, pmd, entry);
+
+ /* Set PMD if cleared earlier */
+ if (ret == HPAGE_PMD_NR)
+ set_pmd_at(mm, addr, pmd, entry);
+
spin_unlock(&vma->vm_mm->page_table_lock);
- ret = 1;
}
return ret;
@@ -1662,7 +1695,7 @@ static void __split_huge_page_refcount(struct page *page,
page_tail->mapping = page->mapping;
page_tail->index = page->index + i;
- page_nid_xchg_last(page_tail, page_nid_last(page));
+ page_cpupid_xchg_last(page_tail, page_cpupid_last(page));
BUG_ON(!PageAnon(page_tail));
BUG_ON(!PageUptodate(page_tail));