summaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c271
1 files changed, 263 insertions, 8 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..7759941d4e77 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -12,9 +12,11 @@
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
+#include <linux/math64.h>
#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
@@ -139,7 +141,8 @@ static void refresh_zone_stat_thresholds(void)
threshold = calculate_threshold(zone);
for_each_online_cpu(cpu)
- zone_pcp(zone, cpu)->stat_threshold = threshold;
+ per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+ = threshold;
}
}
@@ -149,7 +152,8 @@ static void refresh_zone_stat_thresholds(void)
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
int delta)
{
- struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+ struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
+
s8 *p = pcp->vm_stat_diff + item;
long x;
@@ -202,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
*/
void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+ struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
s8 *p = pcp->vm_stat_diff + item;
(*p)++;
@@ -223,7 +227,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+ struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
s8 *p = pcp->vm_stat_diff + item;
(*p)--;
@@ -300,7 +304,7 @@ void refresh_cpu_vm_stats(int cpu)
for_each_populated_zone(zone) {
struct per_cpu_pageset *p;
- p = zone_pcp(zone, cpu);
+ p = per_cpu_ptr(zone->pageset, cpu);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
if (p->vm_stat_diff[i]) {
@@ -376,7 +380,86 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z)
}
#endif
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_COMPACTION
+struct contig_page_info {
+ unsigned long free_pages;
+ unsigned long free_blocks_total;
+ unsigned long free_blocks_suitable;
+};
+
+/*
+ * Calculate the number of free pages in a zone, how many contiguous
+ * pages are free and how many are large enough to satisfy an allocation of
+ * the target size. Note that this function makes no attempt to estimate
+ * how many suitable free blocks there *might* be if MOVABLE pages were
+ * migrated. Calculating that is possible, but expensive and can be
+ * figured out from userspace
+ */
+static void fill_contig_page_info(struct zone *zone,
+ unsigned int suitable_order,
+ struct contig_page_info *info)
+{
+ unsigned int order;
+
+ info->free_pages = 0;
+ info->free_blocks_total = 0;
+ info->free_blocks_suitable = 0;
+
+ for (order = 0; order < MAX_ORDER; order++) {
+ unsigned long blocks;
+
+ /* Count number of free blocks */
+ blocks = zone->free_area[order].nr_free;
+ info->free_blocks_total += blocks;
+
+ /* Count free base pages */
+ info->free_pages += blocks << order;
+
+ /* Count the suitable free blocks */
+ if (order >= suitable_order)
+ info->free_blocks_suitable += blocks <<
+ (order - suitable_order);
+ }
+}
+
+/*
+ * A fragmentation index only makes sense if an allocation of a requested
+ * size would fail. If that is true, the fragmentation index indicates
+ * whether external fragmentation or a lack of memory was the problem.
+ * The value can be used to determine if page reclaim or compaction
+ * should be used
+ */
+static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
+{
+ unsigned long requested = 1UL << order;
+
+ if (!info->free_blocks_total)
+ return 0;
+
+ /* Fragmentation index only makes sense when a request would fail */
+ if (info->free_blocks_suitable)
+ return -1000;
+
+ /*
+ * Index is between 0 and 1 so return within 3 decimal places
+ *
+ * 0 => allocation would fail due to lack of memory
+ * 1 => allocation would fail due to fragmentation
+ */
+ return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
+}
+
+/* Same as __fragmentation index but allocs contig_page_info on stack */
+int fragmentation_index(struct zone *zone, unsigned int order)
+{
+ struct contig_page_info info;
+
+ fill_contig_page_info(zone, order, &info);
+ return __fragmentation_index(order, &info);
+}
+#endif
+
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -429,7 +512,9 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
spin_unlock_irqrestore(&zone->lock, flags);
}
}
+#endif
+#ifdef CONFIG_PROC_FS
static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
struct zone *zone)
{
@@ -690,6 +775,16 @@ static const char * const vmstat_text[] = {
"allocstall",
"pgrotated",
+
+#ifdef CONFIG_COMPACTION
+ "compact_blocks_moved",
+ "compact_pages_moved",
+ "compact_pagemigrate_failed",
+ "compact_stall",
+ "compact_fail",
+ "compact_success",
+#endif
+
#ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",
@@ -741,7 +836,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
for_each_online_cpu(i) {
struct per_cpu_pageset *pageset;
- pageset = zone_pcp(zone, i);
+ pageset = per_cpu_ptr(zone->pageset, i);
seq_printf(m,
"\n cpu: %i"
"\n count: %i"
@@ -761,7 +856,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n prev_priority: %i"
"\n start_pfn: %lu"
"\n inactive_ratio: %u",
- zone_is_all_unreclaimable(zone),
+ zone->all_unreclaimable,
zone->prev_priority,
zone->zone_start_pfn,
zone->inactive_ratio);
@@ -906,6 +1001,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
start_cpu_timer(cpu);
+ node_set_state(cpu_to_node(cpu), N_CPU);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
@@ -950,3 +1046,162 @@ static int __init setup_vmstat(void)
return 0;
}
module_init(setup_vmstat)
+
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
+#include <linux/debugfs.h>
+
+static struct dentry *extfrag_debug_root;
+
+/*
+ * Return an index indicating how much of the available free memory is
+ * unusable for an allocation of the requested size.
+ */
+static int unusable_free_index(unsigned int order,
+ struct contig_page_info *info)
+{
+ /* No free memory is interpreted as all free memory is unusable */
+ if (info->free_pages == 0)
+ return 1000;
+
+ /*
+ * Index should be a value between 0 and 1. Return a value to 3
+ * decimal places.
+ *
+ * 0 => no fragmentation
+ * 1 => high fragmentation
+ */
+ return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
+
+}
+
+static void unusable_show_print(struct seq_file *m,
+ pg_data_t *pgdat, struct zone *zone)
+{
+ unsigned int order;
+ int index;
+ struct contig_page_info info;
+
+ seq_printf(m, "Node %d, zone %8s ",
+ pgdat->node_id,
+ zone->name);
+ for (order = 0; order < MAX_ORDER; ++order) {
+ fill_contig_page_info(zone, order, &info);
+ index = unusable_free_index(order, &info);
+ seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
+ }
+
+ seq_putc(m, '\n');
+}
+
+/*
+ * Display unusable free space index
+ *
+ * The unusable free space index measures how much of the available free
+ * memory cannot be used to satisfy an allocation of a given size and is a
+ * value between 0 and 1. The higher the value, the more of free memory is
+ * unusable and by implication, the worse the external fragmentation is. This
+ * can be expressed as a percentage by multiplying by 100.
+ */
+static int unusable_show(struct seq_file *m, void *arg)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+
+ /* check memoryless node */
+ if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
+ return 0;
+
+ walk_zones_in_node(m, pgdat, unusable_show_print);
+
+ return 0;
+}
+
+static const struct seq_operations unusable_op = {
+ .start = frag_start,
+ .next = frag_next,
+ .stop = frag_stop,
+ .show = unusable_show,
+};
+
+static int unusable_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &unusable_op);
+}
+
+static const struct file_operations unusable_file_ops = {
+ .open = unusable_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static void extfrag_show_print(struct seq_file *m,
+ pg_data_t *pgdat, struct zone *zone)
+{
+ unsigned int order;
+ int index;
+
+ /* Alloc on stack as interrupts are disabled for zone walk */
+ struct contig_page_info info;
+
+ seq_printf(m, "Node %d, zone %8s ",
+ pgdat->node_id,
+ zone->name);
+ for (order = 0; order < MAX_ORDER; ++order) {
+ fill_contig_page_info(zone, order, &info);
+ index = __fragmentation_index(order, &info);
+ seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
+ }
+
+ seq_putc(m, '\n');
+}
+
+/*
+ * Display fragmentation index for orders that allocations would fail for
+ */
+static int extfrag_show(struct seq_file *m, void *arg)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+
+ walk_zones_in_node(m, pgdat, extfrag_show_print);
+
+ return 0;
+}
+
+static const struct seq_operations extfrag_op = {
+ .start = frag_start,
+ .next = frag_next,
+ .stop = frag_stop,
+ .show = extfrag_show,
+};
+
+static int extfrag_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &extfrag_op);
+}
+
+static const struct file_operations extfrag_file_ops = {
+ .open = extfrag_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init extfrag_debug_init(void)
+{
+ extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
+ if (!extfrag_debug_root)
+ return -ENOMEM;
+
+ if (!debugfs_create_file("unusable_index", 0444,
+ extfrag_debug_root, NULL, &unusable_file_ops))
+ return -ENOMEM;
+
+ if (!debugfs_create_file("extfrag_index", 0444,
+ extfrag_debug_root, NULL, &extfrag_file_ops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+module_init(extfrag_debug_init);
+#endif