summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c136
1 files changed, 103 insertions, 33 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e293c58bea58..d0a240fbb8bf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
static void bad_page(struct page *page)
{
- void *pc = page_get_page_cgroup(page);
-
printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
current->comm, page, (int)(2*sizeof(unsigned long)),
(unsigned long)page->flags, page->mapping,
page_mapcount(page), page_count(page));
- if (pc) {
- printk(KERN_EMERG "cgroup:%p\n", pc);
- page_reset_bad_cgroup(page);
- }
+
printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
KERN_EMERG "Backtrace:\n");
dump_stack();
@@ -268,13 +263,14 @@ void prep_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
+ struct page *p = page + 1;
set_compound_page_dtor(page, free_compound_page);
set_compound_order(page, order);
__SetPageHead(page);
- for (i = 1; i < nr_pages; i++) {
- struct page *p = page + i;
-
+ for (i = 1; i < nr_pages; i++, p++) {
+ if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
+ p = pfn_to_page(page_to_pfn(page) + i);
__SetPageTail(p);
p->first_page = page;
}
@@ -284,6 +280,7 @@ static void destroy_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
+ struct page *p = page + 1;
if (unlikely(compound_order(page) != order))
bad_page(page);
@@ -291,8 +288,9 @@ static void destroy_compound_page(struct page *page, unsigned long order)
if (unlikely(!PageHead(page)))
bad_page(page);
__ClearPageHead(page);
- for (i = 1; i < nr_pages; i++) {
- struct page *p = page + i;
+ for (i = 1; i < nr_pages; i++, p++) {
+ if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
+ p = pfn_to_page(page_to_pfn(page) + i);
if (unlikely(!PageTail(p) |
(p->first_page != page)))
@@ -451,14 +449,16 @@ static inline void __free_one_page(struct page *page,
static inline int free_pages_check(struct page *page)
{
+ free_page_mlock(page);
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
- (page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
+ if (PageSwapBacked(page))
+ __ClearPageSwapBacked(page);
/*
* For now, we report if PG_reserved was found set, but do not
* clear it, and do not free the page. But we shall soon need
@@ -597,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
- (page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
bad_page(page);
@@ -611,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
1 << PG_referenced | 1 << PG_arch_1 |
- 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+ 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+ | 1 << PG_mlocked
+#endif
+ );
set_page_private(page, 0);
set_page_refcounted(page);
@@ -1859,10 +1862,21 @@ void show_free_areas(void)
}
}
- printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+ printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+ " inactive_file:%lu"
+//TODO: check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+ " unevictable:%lu"
+#endif
+ " dirty:%lu writeback:%lu unstable:%lu\n"
" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
- global_page_state(NR_ACTIVE),
- global_page_state(NR_INACTIVE),
+ global_page_state(NR_ACTIVE_ANON),
+ global_page_state(NR_ACTIVE_FILE),
+ global_page_state(NR_INACTIVE_ANON),
+ global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ global_page_state(NR_UNEVICTABLE),
+#endif
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
@@ -1885,8 +1899,13 @@ void show_free_areas(void)
" min:%lukB"
" low:%lukB"
" high:%lukB"
- " active:%lukB"
- " inactive:%lukB"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+ " unevictable:%lukB"
+#endif
" present:%lukB"
" pages_scanned:%lu"
" all_unreclaimable? %s"
@@ -1896,8 +1915,13 @@ void show_free_areas(void)
K(zone->pages_min),
K(zone->pages_low),
K(zone->pages_high),
- K(zone_page_state(zone, NR_ACTIVE)),
- K(zone_page_state(zone, NR_INACTIVE)),
+ K(zone_page_state(zone, NR_ACTIVE_ANON)),
+ K(zone_page_state(zone, NR_INACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ACTIVE_FILE)),
+ K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
K(zone->present_pages),
zone->pages_scanned,
(zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3407,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
pgdat->kswapd_max_order = 0;
+ pgdat_page_cgroup_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
+ enum lru_list l;
size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3425,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
- mminit_dprintk(MMINIT_TRACE, "memmap_init",
- "%s zone: %lu pages used for memmap\n",
+ printk(KERN_DEBUG
+ " %s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
@@ -3436,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
/* Account for reserved pages */
if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
- mminit_dprintk(MMINIT_TRACE, "memmap_init",
- "%s zone: %lu pages reserved\n",
+ printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
zone_names[0], dma_reserve);
}
@@ -3462,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->prev_priority = DEF_PRIORITY;
zone_pcp_init(zone);
- INIT_LIST_HEAD(&zone->active_list);
- INIT_LIST_HEAD(&zone->inactive_list);
- zone->nr_scan_active = 0;
- zone->nr_scan_inactive = 0;
+ for_each_lru(l) {
+ INIT_LIST_HEAD(&zone->lru[l].list);
+ zone->lru[l].nr_scan = 0;
+ }
+ zone->recent_rotated[0] = 0;
+ zone->recent_rotated[1] = 0;
+ zone->recent_scanned[0] = 0;
+ zone->recent_scanned[1] = 0;
zap_zone_vm_stats(zone);
zone->flags = 0;
if (!size)
@@ -3949,7 +3978,7 @@ static void check_for_regular_memory(pg_data_t *pgdat)
void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
unsigned long nid;
- enum zone_type i;
+ int i;
/* Sort early_node_map as initialisation assumes it is sorted */
sort_node_map();
@@ -4207,7 +4236,7 @@ void setup_per_zone_pages_min(void)
for_each_zone(zone) {
u64 tmp;
- spin_lock_irqsave(&zone->lru_lock, flags);
+ spin_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone->present_pages;
do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
@@ -4239,13 +4268,53 @@ void setup_per_zone_pages_min(void)
zone->pages_low = zone->pages_min + (tmp >> 2);
zone->pages_high = zone->pages_min + (tmp >> 1);
setup_zone_migrate_reserve(zone);
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
/* update totalreserve_pages */
calculate_totalreserve_pages();
}
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total target max
+ * memory ratio inactive anon
+ * -------------------------------------
+ * 10MB 1 5MB
+ * 100MB 1 50MB
+ * 1GB 3 250MB
+ * 10GB 10 0.9GB
+ * 100GB 31 3GB
+ * 1TB 101 10GB
+ * 10TB 320 32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ unsigned int gb, ratio;
+
+ /* Zone size in gigabytes */
+ gb = zone->present_pages >> (30 - PAGE_SHIFT);
+ ratio = int_sqrt(10 * gb);
+ if (!ratio)
+ ratio = 1;
+
+ zone->inactive_ratio = ratio;
+ }
+}
+
/*
* Initialise min_free_kbytes.
*
@@ -4283,6 +4352,7 @@ static int __init init_per_zone_pages_min(void)
min_free_kbytes = 65536;
setup_per_zone_pages_min();
setup_per_zone_lowmem_reserve();
+ setup_per_zone_inactive_ratio();
return 0;
}
module_init(init_per_zone_pages_min)