diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:20:12 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:20:12 +0900 |
commit | 39cf275a1a18ba3c7eb9b986c5c9b35b57332798 (patch) | |
tree | 40b119ca9d2fbaf8128d3fa25f4c64669002b0c0 /mm/huge_memory.c | |
parent | ad5d69899e52792671c1aa6c7360464c7edfe09c (diff) | |
parent | e5137b50a0640009fd63a3e65c14bc6e1be8796a (diff) | |
download | linux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.tar.gz linux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.tar.bz2 linux-stable-39cf275a1a18ba3c7eb9b986c5c9b35b57332798.zip |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"The main changes in this cycle are:
- (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik
van Riel, Peter Zijlstra et al. Yay!
- optimize preemption counter handling: merge the NEED_RESCHED flag
into the preempt_count variable, by Peter Zijlstra.
- wait.h fixes and code reorganization from Peter Zijlstra
- cfs_bandwidth fixes from Ben Segall
- SMP load-balancer cleanups from Peter Zijstra
- idle balancer improvements from Jason Low
- other fixes and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits)
ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED
stop_machine: Fix race between stop_two_cpus() and stop_cpus()
sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus
sched: Fix asymmetric scheduling for POWER7
sched: Move completion code from core.c to completion.c
sched: Move wait code from core.c to wait.c
sched: Move wait.c into kernel/sched/
sched/wait: Fix __wait_event_interruptible_lock_irq_timeout()
sched: Avoid throttle_cfs_rq() racing with period_timer stopping
sched: Guarantee new group-entities always have weight
sched: Fix hrtimer_cancel()/rq->lock deadlock
sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining
sched: Fix race on toggling cfs_bandwidth_used
sched: Remove extra put_online_cpus() inside sched_setaffinity()
sched/rt: Fix task_tick_rt() comment
sched/wait: Fix build breakage
sched/wait: Introduce prepare_to_wait_event()
sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too
sched: Remove get_online_cpus() usage
sched: Fix race in migrate_swap_stop()
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 55 |
1 files changed, 44 insertions, 11 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cca80d96e509..2612f60f53ee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1282,19 +1282,32 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; int page_nid = -1, this_nid = numa_node_id(); - int target_nid; + int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; + int flags = 0; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); + BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); + last_cpupid = page_cpupid_last(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (page_nid == this_nid) + if (page_nid == this_nid) { count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + flags |= TNF_FAULT_LOCAL; + } + + /* + * Avoid grouping on DSO/COW pages in specific and RO pages + * in general, RO pages shouldn't hurt as much anyway since + * they can be in shared cache state. + */ + if (!pmd_write(pmd)) + flags |= TNF_NO_GROUP; /* * Acquire the page lock to serialise THP migrations but avoid dropping @@ -1325,7 +1338,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); anon_vma = page_lock_anon_vma_read(page); - /* Confirm the PTE did not while locked */ + /* Confirm the PMD did not change while page_table_lock was released */ spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); @@ -1341,8 +1354,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); - if (migrated) + if (migrated) { + flags |= TNF_MIGRATED; page_nid = target_nid; + } goto out; clear_pmdnuma: @@ -1360,7 +1375,7 @@ out: page_unlock_anon_vma_read(anon_vma); if (page_nid != -1) - task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); + task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); return 0; } @@ -1458,6 +1473,12 @@ out: return ret; } +/* + * Returns + * - 0 if PMD could not be locked + * - 1 if PMD was locked but protections unchange and TLB flush unnecessary + * - HPAGE_PMD_NR is protections changed and TLB flush necessary + */ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa) { @@ -1466,22 +1487,34 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma) == 1) { pmd_t entry; - entry = pmdp_get_and_clear(mm, addr, pmd); + ret = 1; if (!prot_numa) { + entry = pmdp_get_and_clear(mm, addr, pmd); entry = pmd_modify(entry, newprot); + ret = HPAGE_PMD_NR; BUG_ON(pmd_write(entry)); } else { struct page *page = pmd_page(*pmd); - /* only check non-shared pages */ - if (page_mapcount(page) == 1 && + /* + * Do not trap faults against the zero page. The + * read-only data is likely to be read-cached on the + * local CPU cache and it is less useful to know about + * local vs remote hits on the zero page. + */ + if (!is_huge_zero_page(page) && !pmd_numa(*pmd)) { + entry = pmdp_get_and_clear(mm, addr, pmd); entry = pmd_mknuma(entry); + ret = HPAGE_PMD_NR; } } - set_pmd_at(mm, addr, pmd, entry); + + /* Set PMD if cleared earlier */ + if (ret == HPAGE_PMD_NR) + set_pmd_at(mm, addr, pmd, entry); + spin_unlock(&vma->vm_mm->page_table_lock); - ret = 1; } return ret; @@ -1662,7 +1695,7 @@ static void __split_huge_page_refcount(struct page *page, page_tail->mapping = page->mapping; page_tail->index = page->index + i; - page_nid_xchg_last(page_tail, page_nid_last(page)); + page_cpupid_xchg_last(page_tail, page_cpupid_last(page)); BUG_ON(!PageAnon(page_tail)); BUG_ON(!PageUptodate(page_tail)); |