diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 31 | ||||
-rw-r--r-- | mm/hugetlb.c | 57 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 13 |
4 files changed, 75 insertions, 30 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index c1330cc19783..a58699b6579e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, */ static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal) + unsigned long align, unsigned long goal, unsigned long limit) { unsigned long offset, remaining_size, areasize, preferred; - unsigned long i, start = 0, incr, eidx; + unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; void *ret; if(!size) { @@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } BUG_ON(align & (align-1)); - eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + if (limit && bdata->node_boot_start >= limit) + return NULL; + + limit >>=PAGE_SHIFT; + if (limit && end_pfn > limit) + end_pfn = limit; + + eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); offset = 0; if (align && (bdata->node_boot_start & (align - 1UL)) != 0) @@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> PAGE_SHIFT) < end_pfn)) { preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) - preferred = bdata->last_success; + if (!limit || (limit && limit > bdata->last_success)) + preferred = bdata->last_success; } else preferred = 0; @@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, + unsigned long limit) { pg_data_t *pgdat = pgdat_list; void *ptr; for_each_pgdat(pgdat) if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal))) + align, goal, limit))) return(ptr); /* @@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned return NULL; } -void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) + +void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { void *ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit); if (ptr) return (ptr); - return __alloc_bootmem(size, align, goal); + return __alloc_bootmem_limit(size, align, goal, limit); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 901ac523a1c3..61d380678030 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, { pte_t *src_pte, *dst_pte, entry; struct page *ptepage; - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; + unsigned long addr; - while (addr < end) { + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { dst_pte = huge_pte_alloc(dst, addr); if (!dst_pte) goto nomem; + spin_lock(&src->page_table_lock); src_pte = huge_pte_offset(src, addr); - BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ - entry = *src_pte; - ptepage = pte_page(entry); - get_page(ptepage); - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); - set_huge_pte_at(dst, addr, dst_pte, entry); - addr += HPAGE_SIZE; + if (src_pte && !pte_none(*src_pte)) { + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + set_huge_pte_at(dst, addr, dst_pte, entry); + } + spin_unlock(&src->page_table_lock); } return 0; @@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, page = pte_page(pte); put_page(page); + add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE)); } - add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); flush_tlb_range(vma, start, end); } @@ -393,6 +394,28 @@ out: return ret; } +/* + * On ia64 at least, it is possible to receive a hugetlb fault from a + * stale zero entry left in the TLB from earlier hardware prefetching. + * Low-level arch code should already have flushed the stale entry as + * part of its fault handling, but we do need to accept this minor fault + * and return successfully. Whereas the "normal" case is that this is + * an access to a hugetlb page which has been truncated off since mmap. + */ +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) +{ + int ret = VM_FAULT_SIGBUS; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pte = huge_pte_offset(mm, address); + if (pte && !pte_none(*pte)) + ret = VM_FAULT_MINOR; + spin_unlock(&mm->page_table_lock); + return ret; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i) @@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, BUG_ON(!is_vm_hugetlb_page(vma)); vpfn = vaddr/PAGE_SIZE; + spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { if (pages) { @@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * indexing below to work. */ pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); + /* the hugetlb file might have been truncated */ + if (!pte || pte_none(*pte)) { + remainder = 0; + if (!i) + i = -EFAULT; + break; + } page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; @@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, --remainder; ++i; } - + spin_unlock(&mm->page_table_lock); *length = remainder; *position = vaddr; diff --git a/mm/memory.c b/mm/memory.c index ae8161f1f459..1db40e935e55 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, inc_page_state(pgfault); - if (is_vm_hugetlb_page(vma)) - return VM_FAULT_SIGBUS; /* mapping truncation does this. */ + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, write_access); /* * We need the page table lock to synchronize with kswapd diff --git a/mm/vmscan.c b/mm/vmscan.c index 0ea71e887bb6..64f9570cff56 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * PageDirty _after_ making sure that the page is freeable and * not in use by anybody. (pagecache + us == 2) */ - if (page_count(page) != 2 || PageDirty(page)) { - write_unlock_irq(&mapping->tree_lock); - goto keep_locked; - } + if (unlikely(page_count(page) != 2)) + goto cannot_free; + smp_rmb(); + if (unlikely(PageDirty(page))) + goto cannot_free; #ifdef CONFIG_SWAP if (PageSwapCache(page)) { @@ -538,6 +539,10 @@ free_it: __pagevec_release_nonlru(&freed_pvec); continue; +cannot_free: + write_unlock_irq(&mapping->tree_lock); + goto keep_locked; + activate_locked: SetPageActive(page); pgactivate++; |