summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c31
-rw-r--r--mm/hugetlb.c57
-rw-r--r--mm/memory.c4
-rw-r--r--mm/vmscan.c13
4 files changed, 75 insertions, 30 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index c1330cc19783..a58699b6579e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
*/
static void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
- unsigned long align, unsigned long goal)
+ unsigned long align, unsigned long goal, unsigned long limit)
{
unsigned long offset, remaining_size, areasize, preferred;
- unsigned long i, start = 0, incr, eidx;
+ unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
void *ret;
if(!size) {
@@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
}
BUG_ON(align & (align-1));
- eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ if (limit && bdata->node_boot_start >= limit)
+ return NULL;
+
+ limit >>=PAGE_SHIFT;
+ if (limit && end_pfn > limit)
+ end_pfn = limit;
+
+ eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
offset = 0;
if (align &&
(bdata->node_boot_start & (align - 1UL)) != 0)
@@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
* first, then we try to allocate lower pages.
*/
if (goal && (goal >= bdata->node_boot_start) &&
- ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+ ((goal >> PAGE_SHIFT) < end_pfn)) {
preferred = goal - bdata->node_boot_start;
if (bdata->last_success >= preferred)
- preferred = bdata->last_success;
+ if (!limit || (limit && limit > bdata->last_success))
+ preferred = bdata->last_success;
} else
preferred = 0;
@@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void)
return(free_all_bootmem_core(NODE_DATA(0)));
}
-void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
+ unsigned long limit)
{
pg_data_t *pgdat = pgdat_list;
void *ptr;
for_each_pgdat(pgdat)
if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
- align, goal)))
+ align, goal, limit)))
return(ptr);
/*
@@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned
return NULL;
}
-void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal)
+
+void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
+ unsigned long goal, unsigned long limit)
{
void *ptr;
- ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal);
+ ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
if (ptr)
return (ptr);
- return __alloc_bootmem(size, align, goal);
+ return __alloc_bootmem_limit(size, align, goal, limit);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 901ac523a1c3..61d380678030 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
{
pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
- unsigned long addr = vma->vm_start;
- unsigned long end = vma->vm_end;
+ unsigned long addr;
- while (addr < end) {
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte)
goto nomem;
+ spin_lock(&src->page_table_lock);
src_pte = huge_pte_offset(src, addr);
- BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */
- entry = *src_pte;
- ptepage = pte_page(entry);
- get_page(ptepage);
- add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
- set_huge_pte_at(dst, addr, dst_pte, entry);
- addr += HPAGE_SIZE;
+ if (src_pte && !pte_none(*src_pte)) {
+ entry = *src_pte;
+ ptepage = pte_page(entry);
+ get_page(ptepage);
+ add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ }
+ spin_unlock(&src->page_table_lock);
}
return 0;
@@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
page = pte_page(pte);
put_page(page);
+ add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE));
}
- add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
flush_tlb_range(vma, start, end);
}
@@ -393,6 +394,28 @@ out:
return ret;
}
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully. Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access)
+{
+ int ret = VM_FAULT_SIGBUS;
+ pte_t *pte;
+
+ spin_lock(&mm->page_table_lock);
+ pte = huge_pte_offset(mm, address);
+ if (pte && !pte_none(*pte))
+ ret = VM_FAULT_MINOR;
+ spin_unlock(&mm->page_table_lock);
+ return ret;
+}
+
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i)
@@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
BUG_ON(!is_vm_hugetlb_page(vma));
vpfn = vaddr/PAGE_SIZE;
+ spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
if (pages) {
@@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* indexing below to work. */
pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || pte_none(*pte));
+ /* the hugetlb file might have been truncated */
+ if (!pte || pte_none(*pte)) {
+ remainder = 0;
+ if (!i)
+ i = -EFAULT;
+ break;
+ }
page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
@@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
--remainder;
++i;
}
-
+ spin_unlock(&mm->page_table_lock);
*length = remainder;
*position = vaddr;
diff --git a/mm/memory.c b/mm/memory.c
index ae8161f1f459..1db40e935e55 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
inc_page_state(pgfault);
- if (is_vm_hugetlb_page(vma))
- return VM_FAULT_SIGBUS; /* mapping truncation does this. */
+ if (unlikely(is_vm_hugetlb_page(vma)))
+ return hugetlb_fault(mm, vma, address, write_access);
/*
* We need the page table lock to synchronize with kswapd
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0ea71e887bb6..64f9570cff56 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
* PageDirty _after_ making sure that the page is freeable and
* not in use by anybody. (pagecache + us == 2)
*/
- if (page_count(page) != 2 || PageDirty(page)) {
- write_unlock_irq(&mapping->tree_lock);
- goto keep_locked;
- }
+ if (unlikely(page_count(page) != 2))
+ goto cannot_free;
+ smp_rmb();
+ if (unlikely(PageDirty(page)))
+ goto cannot_free;
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
@@ -538,6 +539,10 @@ free_it:
__pagevec_release_nonlru(&freed_pvec);
continue;
+cannot_free:
+ write_unlock_irq(&mapping->tree_lock);
+ goto keep_locked;
+
activate_locked:
SetPageActive(page);
pgactivate++;