summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 18:16:40 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-29 21:40:42 -0700
commit4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 (patch)
tree1f76d33bb1d76221c6424bc5fed080a4f91349a6
parentb38c6845b695141259019e2b7c0fe6c32a6e720d (diff)
downloadlinux-4c21e2f2441dc5fbb957b030333f5a3f2d02dea7.tar.gz
linux-4c21e2f2441dc5fbb957b030333f5a3f2d02dea7.tar.bz2
linux-4c21e2f2441dc5fbb957b030333f5a3f2d02dea7.zip
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/arm/mm/mm-armv.c1
-rw-r--r--arch/frv/mm/pgalloc.c4
-rw-r--r--arch/i386/mm/pgtable.c8
-rw-r--r--arch/um/kernel/skas/mmu.c1
-rw-r--r--fs/afs/file.c4
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/jfs/jfs_metapage.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c7
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/mm.h46
-rw-r--r--kernel/kexec.c4
-rw-r--r--mm/Kconfig13
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memory.c24
-rw-r--r--mm/mremap.c11
-rw-r--r--mm/page_alloc.c16
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/shmem.c22
-rw-r--r--mm/swap.c2
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c12
-rw-r--r--mm/vmscan.c2
23 files changed, 138 insertions, 79 deletions
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 60f3e039bac2..1221fdde1769 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -229,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
pte = pmd_page(*pmd);
pmd_clear(pmd);
dec_page_state(nr_page_table_pages);
+ pte_lock_deinit(pte);
pte_free(pte);
pmd_free(pmd);
free:
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 4eaec0f3525b..2c67dfe5a6b3 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
if (pgd_list)
pgd_list->private = (unsigned long) &page->index;
pgd_list = page;
- page->private = (unsigned long) &pgd_list;
+ set_page_private(page, (unsigned long)&pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *next, **pprev, *page = virt_to_page(pgd);
next = (struct page *) page->index;
- pprev = (struct page **) page->private;
+ pprev = (struct page **)page_private(page);
*pprev = next;
if (next)
next->private = (unsigned long) pprev;
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index dcdce2c6c532..39c099f15b5f 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -188,19 +188,19 @@ static inline void pgd_list_add(pgd_t *pgd)
struct page *page = virt_to_page(pgd);
page->index = (unsigned long)pgd_list;
if (pgd_list)
- pgd_list->private = (unsigned long)&page->index;
+ set_page_private(pgd_list, (unsigned long)&page->index);
pgd_list = page;
- page->private = (unsigned long)&pgd_list;
+ set_page_private(page, (unsigned long)&pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *next, **pprev, *page = virt_to_page(pgd);
next = (struct page *)page->index;
- pprev = (struct page **)page->private;
+ pprev = (struct page **)page_private(page);
*pprev = next;
if (next)
- next->private = (unsigned long)pprev;
+ set_page_private(next, (unsigned long)pprev);
}
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 02cf36e0331a..9e5e39cea821 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -144,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)
if(!proc_mm || !ptrace_faultinfo){
free_page(mmu->id.stack);
+ pte_lock_deinit(virt_to_page(mmu->last_page_table));
pte_free_kernel((pte_t *) mmu->last_page_table);
dec_page_state(nr_page_table_pages);
#ifdef CONFIG_3_LEVEL_PGTABLES
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0d576987ec67..4975c9c193dd 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
cachefs_uncache_page(vnode->cache, page);
#endif
- pageio = (struct cachefs_page *) page->private;
- page->private = 0;
+ pageio = (struct cachefs_page *) page_private(page);
+ set_page_private(page, 0);
ClearPagePrivate(page);
if (pageio)
diff --git a/fs/buffer.c b/fs/buffer.c
index b1667986442f..2066e4cb700c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -96,7 +96,7 @@ static void
__clear_page_buffers(struct page *page)
{
ClearPagePrivate(page);
- page->private = 0;
+ set_page_private(page, 0);
page_cache_release(page);
}
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 26091a5f88d4..8a53981f9f27 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -86,7 +86,7 @@ struct meta_anchor {
atomic_t io_count;
struct metapage *mp[MPS_PER_PAGE];
};
-#define mp_anchor(page) ((struct meta_anchor *)page->private)
+#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
static inline struct metapage *page_to_mp(struct page *page, uint offset)
{
@@ -108,7 +108,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
if (!a)
return -ENOMEM;
memset(a, 0, sizeof(struct meta_anchor));
- page->private = (unsigned long)a;
+ set_page_private(page, (unsigned long)a);
SetPagePrivate(page);
kmap(page);
}
@@ -136,7 +136,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
a->mp[index] = NULL;
if (--a->mp_count == 0) {
kfree(a);
- page->private = 0;
+ set_page_private(page, 0);
ClearPagePrivate(page);
kunmap(page);
}
@@ -156,13 +156,13 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *))
#else
static inline struct metapage *page_to_mp(struct page *page, uint offset)
{
- return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+ return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
}
static inline int insert_metapage(struct page *page, struct metapage *mp)
{
if (mp) {
- page->private = (unsigned long)mp;
+ set_page_private(page, (unsigned long)mp);
SetPagePrivate(page);
kmap(page);
}
@@ -171,7 +171,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
static inline void remove_metapage(struct page *page, struct metapage *mp)
{
- page->private = 0;
+ set_page_private(page, 0);
ClearPagePrivate(page);
kunmap(page);
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba4767c04adf..4cd46abe8434 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -181,8 +181,9 @@ set_page_region(
size_t offset,
size_t length)
{
- page->private |= page_region_mask(offset, length);
- if (page->private == ~0UL)
+ set_page_private(page,
+ page_private(page) | page_region_mask(offset, length));
+ if (page_private(page) == ~0UL)
SetPageUptodate(page);
}
@@ -194,7 +195,7 @@ test_page_region(
{
unsigned long mask = page_region_mask(offset, length);
- return (mask && (page->private & mask) == mask);
+ return (mask && (page_private(page) & mask) == mask);
}
/*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 88af42f5e04a..c937d6e65502 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
/* If we *know* page->private refers to buffer_heads */
#define page_buffers(page) \
({ \
- BUG_ON(!PagePrivate(page)); \
- ((struct buffer_head *)(page)->private); \
+ BUG_ON(!PagePrivate(page)); \
+ ((struct buffer_head *)page_private(page)); \
})
#define page_has_buffers(page) PagePrivate(page)
@@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
{
page_cache_get(page);
SetPagePrivate(page);
- page->private = (unsigned long)head;
+ set_page_private(page, (unsigned long)head);
}
static inline void get_bh(struct buffer_head *bh)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e8d1424153bb..8a514eca40d5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -226,13 +226,18 @@ struct page {
* to show when page is mapped
* & limit reverse map searches.
*/
- unsigned long private; /* Mapping-private opaque data:
+ union {
+ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
* When page is free, this indicates
* order in the buddy system.
*/
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+ spinlock_t ptl;
+#endif
+ } u;
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
@@ -260,6 +265,9 @@ struct page {
#endif /* WANT_PAGE_VIRTUAL */
};
+#define page_private(page) ((page)->u.private)
+#define set_page_private(page, v) ((page)->u.private = (v))
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
#ifdef CONFIG_HUGETLB_PAGE
-static inline int page_count(struct page *p)
+static inline int page_count(struct page *page)
{
- if (PageCompound(p))
- p = (struct page *)p->private;
- return atomic_read(&(p)->_count) + 1;
+ if (PageCompound(page))
+ page = (struct page *)page_private(page);
+ return atomic_read(&page->_count) + 1;
}
static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
- page = (struct page *)page->private;
+ page = (struct page *)page_private(page);
atomic_inc(&page->_count);
}
@@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
static inline pgoff_t page_index(struct page *page)
{
if (unlikely(PageSwapCache(page)))
- return page->private;
+ return page_private(page);
return page->index;
}
@@ -779,9 +787,31 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
}
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page) &((page)->u.ptl)
+#define pte_lock_init(_page) do { \
+ spin_lock_init(__pte_lockptr(_page)); \
+} while (0)
+#define pte_lock_deinit(page) ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page) do {} while (0)
+#define pte_lock_deinit(page) do {} while (0)
+#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
({ \
- spinlock_t *__ptl = &(mm)->page_table_lock; \
+ spinlock_t *__ptl = pte_lockptr(mm, pmd); \
pte_t *__pte = pte_offset_map(pmd, address); \
*(ptlp) = __ptl; \
spin_lock(__ptl); \
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 36c5d9cd4cc1..2c95848fbce8 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -334,7 +334,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
if (pages) {
unsigned int count, i;
pages->mapping = NULL;
- pages->private = order;
+ set_page_private(pages, order);
count = 1 << order;
for (i = 0; i < count; i++)
SetPageReserved(pages + i);
@@ -347,7 +347,7 @@ static void kimage_free_pages(struct page *page)
{
unsigned int order, count, i;
- order = page->private;
+ order = page_private(page);
count = 1 << order;
for (i = 0; i < count; i++)
ClearPageReserved(page + i);
diff --git a/mm/Kconfig b/mm/Kconfig
index 391ffc54d136..f35a550ba4b9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -111,3 +111,16 @@ config SPARSEMEM_STATIC
config SPARSEMEM_EXTREME
def_bool y
depends on SPARSEMEM && !SPARSEMEM_STATIC
+
+# Heavily threaded applications may benefit from splitting the mm-wide
+# page_table_lock, so that faults on different parts of the user address
+# space can be handled with less contention: split it at this NR_CPUS.
+# Default to 4 for wider testing, though 8 might be more appropriate.
+# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
+# PA-RISC's debug spinlock_t is too large for the 32-bit struct page.
+#
+config SPLIT_PTLOCK_CPUS
+ int
+ default "4096" if ARM && !CPU_CACHE_VIPT
+ default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT
+ default "4"
diff --git a/mm/filemap.c b/mm/filemap.c
index 8aa344e88489..f560b41c8f61 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -152,7 +152,7 @@ static int sync_page(void *word)
* in the ->sync_page() methods make essential use of the
* page_mapping(), merely passing the page down to the backing
* device's unplug functions when it's non-NULL, which in turn
- * ignore it for all cases but swap, where only page->private is
+ * ignore it for all cases but swap, where only page_private(page) is
* of interest. When page_mapping() does go NULL, the entire
* call stack gracefully ignores the page and returns.
* -- wli
diff --git a/mm/memory.c b/mm/memory.c
index 8461e2dd91d7..e9ef599498b5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -114,6 +114,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
{
struct page *page = pmd_page(*pmd);
pmd_clear(pmd);
+ pte_lock_deinit(page);
pte_free_tlb(tlb, page);
dec_page_state(nr_page_table_pages);
tlb->mm->nr_ptes--;
@@ -294,10 +295,12 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
if (!new)
return -ENOMEM;
+ pte_lock_init(new);
spin_lock(&mm->page_table_lock);
- if (pmd_present(*pmd)) /* Another has populated it */
+ if (pmd_present(*pmd)) { /* Another has populated it */
+ pte_lock_deinit(new);
pte_free(new);
- else {
+ } else {
mm->nr_ptes++;
inc_page_state(nr_page_table_pages);
pmd_populate(mm, pmd, new);
@@ -432,7 +435,7 @@ again:
if (!dst_pte)
return -ENOMEM;
src_pte = pte_offset_map_nested(src_pmd, addr);
- src_ptl = &src_mm->page_table_lock;
+ src_ptl = pte_lockptr(src_mm, src_pmd);
spin_lock(src_ptl);
do {
@@ -1194,15 +1197,16 @@ EXPORT_SYMBOL(remap_pfn_range);
* (but do_wp_page is only called after already making such a check;
* and do_anonymous_page and do_no_page can safely check later on).
*/
-static inline int pte_unmap_same(struct mm_struct *mm,
+static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
pte_t *page_table, pte_t orig_pte)
{
int same = 1;
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
if (sizeof(pte_t) > sizeof(unsigned long)) {
- spin_lock(&mm->page_table_lock);
+ spinlock_t *ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
same = pte_same(*page_table, orig_pte);
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
}
#endif
pte_unmap(page_table);
@@ -1655,7 +1659,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte;
int ret = VM_FAULT_MINOR;
- if (!pte_unmap_same(mm, page_table, orig_pte))
+ if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
goto out;
entry = pte_to_swp_entry(orig_pte);
@@ -1773,7 +1777,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_get(page);
entry = mk_pte(page, vma->vm_page_prot);
- ptl = &mm->page_table_lock;
+ ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (!pte_none(*page_table))
goto release;
@@ -1934,7 +1938,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff;
int err;
- if (!pte_unmap_same(mm, page_table, orig_pte))
+ if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
return VM_FAULT_MINOR;
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
@@ -1992,7 +1996,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
pte, pmd, write_access, entry);
}
- ptl = &mm->page_table_lock;
+ ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
diff --git a/mm/mremap.c b/mm/mremap.c
index 8de77b632a20..b535438c363c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -72,7 +72,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
- spinlock_t *old_ptl;
+ spinlock_t *old_ptl, *new_ptl;
if (vma->vm_file) {
/*
@@ -88,8 +88,15 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
new_vma->vm_truncate_count = 0;
}
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * pte locks because exclusive mmap_sem prevents deadlock.
+ */
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
new_pte = pte_offset_map_nested(new_pmd, new_addr);
+ new_ptl = pte_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock(new_ptl);
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
new_pte++, new_addr += PAGE_SIZE) {
@@ -101,6 +108,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
set_pte_at(mm, new_addr, new_pte, pte);
}
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
pte_unmap_nested(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
if (mapping)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0541288ebf4b..a2995a5d012c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -154,7 +154,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
struct page *p = page + i;
SetPageCompound(p);
- p->private = (unsigned long)page;
+ set_page_private(p, (unsigned long)page);
}
}
@@ -174,7 +174,7 @@ static void destroy_compound_page(struct page *page, unsigned long order)
if (!PageCompound(p))
bad_page(__FUNCTION__, page);
- if (p->private != (unsigned long)page)
+ if (page_private(p) != (unsigned long)page)
bad_page(__FUNCTION__, page);
ClearPageCompound(p);
}
@@ -187,18 +187,18 @@ static void destroy_compound_page(struct page *page, unsigned long order)
* So, we don't need atomic page->flags operations here.
*/
static inline unsigned long page_order(struct page *page) {
- return page->private;
+ return page_private(page);
}
static inline void set_page_order(struct page *page, int order) {
- page->private = order;
+ set_page_private(page, order);
__SetPagePrivate(page);
}
static inline void rmv_page_order(struct page *page)
{
__ClearPagePrivate(page);
- page->private = 0;
+ set_page_private(page, 0);
}
/*
@@ -238,7 +238,7 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* (a) the buddy is free &&
* (b) the buddy is on the buddy system &&
* (c) a page and its buddy have the same order.
- * for recording page's order, we use page->private and PG_private.
+ * for recording page's order, we use page_private(page) and PG_private.
*
*/
static inline int page_is_buddy(struct page *page, int order)
@@ -264,7 +264,7 @@ static inline int page_is_buddy(struct page *page, int order)
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
* free pages of length of (1 << order) and marked with PG_Private.Page's
- * order is recorded in page->private field.
+ * order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
* free, the remainder of the region must be split into blocks.
@@ -463,7 +463,7 @@ static void prep_new_page(struct page *page, int order)
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked | 1 << PG_mappedtodisk);
- page->private = 0;
+ set_page_private(page, 0);
set_page_refs(page, order);
kernel_map_pages(page, 1 << order, 1);
}
diff --git a/mm/page_io.c b/mm/page_io.c
index 330e00d6db00..bb2b0d53889c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -91,7 +91,8 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
unlock_page(page);
goto out;
}
- bio = get_swap_bio(GFP_NOIO, page->private, page, end_swap_bio_write);
+ bio = get_swap_bio(GFP_NOIO, page_private(page), page,
+ end_swap_bio_write);
if (bio == NULL) {
set_page_dirty(page);
unlock_page(page);
@@ -115,7 +116,8 @@ int swap_readpage(struct file *file, struct page *page)
BUG_ON(!PageLocked(page));
ClearPageUptodate(page);
- bio = get_swap_bio(GFP_KERNEL, page->private, page, end_swap_bio_read);
+ bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
+ end_swap_bio_read);
if (bio == NULL) {
unlock_page(page);
ret = -ENOMEM;
diff --git a/mm/rmap.c b/mm/rmap.c
index a84bdfe582c0..a33e779d1bd8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -274,7 +274,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
return NULL;
}
- ptl = &mm->page_table_lock;
+ ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
*ptlp = ptl;
@@ -550,7 +550,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
update_hiwater_rss(mm);
if (PageAnon(page)) {
- swp_entry_t entry = { .val = page->private };
+ swp_entry_t entry = { .val = page_private(page) };
/*
* Store the swap location in the pte.
* See handle_pte_fault() ...
diff --git a/mm/shmem.c b/mm/shmem.c
index 37777f4c11f8..dc25565a61e9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -71,9 +71,6 @@
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
-/* Keep swapped page count in private field of indirect struct page */
-#define nr_swapped private
-
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
enum sgp_type {
SGP_QUICK, /* don't try more than file page cache lookup */
@@ -324,8 +321,10 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
entry->val = value;
info->swapped += incdec;
- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
- kmap_atomic_to_page(entry)->nr_swapped += incdec;
+ if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
+ struct page *page = kmap_atomic_to_page(entry);
+ set_page_private(page, page_private(page) + incdec);
+ }
}
/*
@@ -368,9 +367,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
spin_unlock(&info->lock);
page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
- if (page) {
- page->nr_swapped = 0;
- }
+ if (page)
+ set_page_private(page, 0);
spin_lock(&info->lock);
if (!page) {
@@ -561,7 +559,7 @@ static void shmem_truncate(struct inode *inode)
diroff = 0;
}
subdir = dir[diroff];
- if (subdir && subdir->nr_swapped) {
+ if (subdir && page_private(subdir)) {
size = limit - idx;
if (size > ENTRIES_PER_PAGE)
size = ENTRIES_PER_PAGE;
@@ -572,10 +570,10 @@ static void shmem_truncate(struct inode *inode)
nr_swaps_freed += freed;
if (offset)
spin_lock(&info->lock);
- subdir->nr_swapped -= freed;
+ set_page_private(subdir, page_private(subdir) - freed);
if (offset)
spin_unlock(&info->lock);
- BUG_ON(subdir->nr_swapped > offset);
+ BUG_ON(page_private(subdir) > offset);
}
if (offset)
offset = 0;
@@ -743,7 +741,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
dir = shmem_dir_map(subdir);
}
subdir = *dir;
- if (subdir && subdir->nr_swapped) {
+ if (subdir && page_private(subdir)) {
ptr = shmem_swp_map(subdir);
size = limit - idx;
if (size > ENTRIES_PER_PAGE)
diff --git a/mm/swap.c b/mm/swap.c
index 21d15f99805c..b89512877ec2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -39,7 +39,7 @@ int page_cluster;
void put_page(struct page *page)
{
if (unlikely(PageCompound(page))) {
- page = (struct page *)page->private;
+ page = (struct page *)page_private(page);
if (put_page_testzero(page)) {
void (*dtor)(struct page *page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 132164f7d0a7..cafc1edcbeba 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -83,7 +83,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
page_cache_get(page);
SetPageLocked(page);
SetPageSwapCache(page);
- page->private = entry.val;
+ set_page_private(page, entry.val);
total_swapcache_pages++;
pagecache_acct(1);
}
@@ -126,8 +126,8 @@ void __delete_from_swap_cache(struct page *page)
BUG_ON(PageWriteback(page));
BUG_ON(PagePrivate(page));
- radix_tree_delete(&swapper_space.page_tree, page->private);
- page->private = 0;
+ radix_tree_delete(&swapper_space.page_tree, page_private(page));
+ set_page_private(page, 0);
ClearPageSwapCache(page);
total_swapcache_pages--;
pagecache_acct(-1);
@@ -197,7 +197,7 @@ void delete_from_swap_cache(struct page *page)
{
swp_entry_t entry;
- entry.val = page->private;
+ entry.val = page_private(page);
write_lock_irq(&swapper_space.tree_lock);
__delete_from_swap_cache(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 510f0039b000..8970c0b74194 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -61,7 +61,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
swp_entry_t entry;
down_read(&swap_unplug_sem);
- entry.val = page->private;
+ entry.val = page_private(page);
if (PageSwapCache(page)) {
struct block_device *bdev = swap_info[swp_type(entry)].bdev;
struct backing_dev_info *bdi;
@@ -69,8 +69,8 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
/*
* If the page is removed from swapcache from under us (with a
* racy try_to_unuse/swapoff) we need an additional reference
- * count to avoid reading garbage from page->private above. If
- * the WARN_ON triggers during a swapoff it maybe the race
+ * count to avoid reading garbage from page_private(page) above.
+ * If the WARN_ON triggers during a swapoff it maybe the race
* condition and it's harmless. However if it triggers without
* swapoff it signals a problem.
*/
@@ -294,7 +294,7 @@ static inline int page_swapcount(struct page *page)
struct swap_info_struct *p;
swp_entry_t entry;
- entry.val = page->private;
+ entry.val = page_private(page);
p = swap_info_get(entry);
if (p) {
/* Subtract the 1 for the swap cache itself */
@@ -339,7 +339,7 @@ int remove_exclusive_swap_page(struct page *page)
if (page_count(page) != 2) /* 2: us + cache */
return 0;
- entry.val = page->private;
+ entry.val = page_private(page);
p = swap_info_get(entry);
if (!p)
return 0;
@@ -1042,7 +1042,7 @@ int page_queue_congested(struct page *page)
BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
if (PageSwapCache(page)) {
- swp_entry_t entry = { .val = page->private };
+ swp_entry_t entry = { .val = page_private(page) };
struct swap_info_struct *sis;
sis = get_swap_info_struct(swp_type(entry));
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 41d1064aabfb..135bf8ca96ee 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -521,7 +521,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
- swp_entry_t swap = { .val = page->private };
+ swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
write_unlock_irq(&mapping->tree_lock);
swap_free(swap);