hugetlbfs: allocate structures for reservation tracking outside of spinlocks

In the normal case, hugetlbfs reserves hugepages at map time so that the pages exist for future faults. A struct file_region is used to track when reservations have been consumed and where. These file_regions are allocated as necessary with kmalloc() which can sleep with the mm->page_table_lock held. This is wrong and triggers may-sleep warning when PREEMPT is enabled. Updates to the underlying file_region are done in two phases. The first phase prepares the region for the change, allocating any necessary memory, without actually making the change. The second phase actually commits the change. This patch makes use of this by checking the reservations before the page_table_lock is taken; triggering any necessary allocations. This may then be safely repeated within the locks without any allocations being required. Credit to Mel Gorman for diagnosing this failure and initial versions of the patch. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Tested-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Andy Whitcroft <apw@shadowen.org> 2008-08-12 15:08:47 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-08-12 16:07:28 -0700
commit: 57303d80175e10056bf51206f9961d586f02f967 (patch)
tree: 3979c1d3e6bf154227ef94245c5a7b6141512211
parent: ff1a4a7b14ae146142b1c93a001304caf662ae13 (diff)
download: linux-57303d80175e10056bf51206f9961d586f02f967.tar.gz
linux-57303d80175e10056bf51206f9961d586f02f967.tar.bz2
linux-57303d80175e10056bf51206f9961d586f02f967.zip
1 files changed, 35 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 92155db888b9..4c97c174e2e1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1942,6 +1942,15 @@ retry:
 			lock_page(page);
 	}
 
+	/*
+	 * If we are going to COW a private mapping later, we examine the
+	 * pending reservations for this page now. This will ensure that
+	 * any allocations necessary to record that reservation occur outside
+	 * the spinlock.
+	 */
+	if (write_access && !(vma->vm_flags & VM_SHARED))
+		vma_needs_reservation(h, vma, address);
+
 	spin_lock(&mm->page_table_lock);
 	size = i_size_read(mapping->host) >> huge_page_shift(h);
 	if (idx >= size)
@@ -1978,6 +1987,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t *ptep;
 	pte_t entry;
 	int ret;
+	struct page *pagecache_page = NULL;
 	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
 	struct hstate *h = hstate_vma(vma);
 
@@ -2000,19 +2010,35 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	ret = 0;
 
+	/*
+	 * If we are going to COW the mapping later, we examine the pending
+	 * reservations for this page now. This will ensure that any
+	 * allocations necessary to record that reservation occur outside the
+	 * spinlock. For private mappings, we also lookup the pagecache
+	 * page now as it is used to determine if a reservation has been
+	 * consumed.
+	 */
+	if (write_access && !pte_write(entry)) {
+		vma_needs_reservation(h, vma, address);
+
+		if (!(vma->vm_flags & VM_SHARED))
+			pagecache_page = hugetlbfs_pagecache_page(h,
+								vma, address);
+	}
+
 	spin_lock(&mm->page_table_lock);
 	/* Check for a racing update before calling hugetlb_cow */
 	if (likely(pte_same(entry, huge_ptep_get(ptep))))
-		if (write_access && !pte_write(entry)) {
-			struct page *page;
-			page = hugetlbfs_pagecache_page(h, vma, address);
-			ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
-			if (page) {
-				unlock_page(page);
-				put_page(page);
-			}
-		}
+		if (write_access && !pte_write(entry))
+			ret = hugetlb_cow(mm, vma, address, ptep, entry,
+							pagecache_page);
 	spin_unlock(&mm->page_table_lock);
+
+	if (pagecache_page) {
+		unlock_page(pagecache_page);
+		put_page(pagecache_page);
+	}
+
 	mutex_unlock(&hugetlb_instantiation_mutex);
 
 	return ret;
author	Andy Whitcroft <apw@shadowen.org>	2008-08-12 15:08:47 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-08-12 16:07:28 -0700
commit	57303d80175e10056bf51206f9961d586f02f967 (patch)
tree	3979c1d3e6bf154227ef94245c5a7b6141512211
parent	ff1a4a7b14ae146142b1c93a001304caf662ae13 (diff)
download	linux-57303d80175e10056bf51206f9961d586f02f967.tar.gz linux-57303d80175e10056bf51206f9961d586f02f967.tar.bz2 linux-57303d80175e10056bf51206f9961d586f02f967.zip