summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 19:25:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 19:25:39 -0700
commitac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch)
treee37328cfbeaf43716dd5914cad9179e57e84df76 /include
parenta40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff)
parent437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff)
downloadlinux-ac694dbdbc403c00e2c14d10bc7b8412cc378259.tar.gz
linux-ac694dbdbc403c00e2c14d10bc7b8412cc378259.tar.bz2
linux-ac694dbdbc403c00e2c14d10bc7b8412cc378259.zip
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches: - MM - a few random fixes - a couple of RTC leftovers * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits) rtc/rtc-88pm80x: remove unneed devm_kfree rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables tmpfs: distribute interleave better across nodes mm: remove redundant initialization mm: warn if pg_data_t isn't initialized with zero mips: zero out pg_data_t when it's allocated memcg: gix memory accounting scalability in shrink_page_list mm/sparse: remove index_init_lock mm/sparse: more checks on mem_section number mm/sparse: optimize sparse_index_alloc memcg: add mem_cgroup_from_css() helper memcg: further prevent OOM with too many dirty pages memcg: prevent OOM with too many dirty pages mm: mmu_notifier: fix freed page still mapped in secondary MMU mm: memcg: only check anon swapin page charges for swap cache mm: memcg: only check swap cache pages for repeated charging mm: memcg: split swapin charge function into private and public part mm: memcg: remove needless !mm fixup to init_mm when charging mm: memcg: remove unneeded shmem charge type ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h3
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/cgroup_subsys.h8
-rw-r--r--include/linux/compaction.h4
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/gfp.h13
-rw-r--r--include/linux/highmem.h7
-rw-r--r--include/linux/hugetlb.h50
-rw-r--r--include/linux/hugetlb_cgroup.h126
-rw-r--r--include/linux/memcontrol.h34
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h31
-rw-r--r--include/linux/mm_types.h9
-rw-r--r--include/linux/mmzone.h26
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/oom.h21
-rw-r--r--include/linux/page-flags.h29
-rw-r--r--include/linux/page-isolation.h13
-rw-r--r--include/linux/page_cgroup.h10
-rw-r--r--include/linux/pagemap.h5
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/shrinker.h1
-rw-r--r--include/linux/skbuff.h80
-rw-r--r--include/linux/sunrpc/xprt.h3
-rw-r--r--include/linux/swap.h14
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/vmstat.h5
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/net/sock.h40
-rw-r--r--include/trace/events/gfpflags.h1
30 files changed, 492 insertions, 74 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 489de625cd25..c97c6b9cd38e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -17,6 +17,7 @@
#include <linux/timer.h>
#include <linux/writeback.h>
#include <linux/atomic.h>
+#include <linux/sysctl.h>
struct page;
struct device;
@@ -304,6 +305,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
void set_bdi_congested(struct backing_dev_info *bdi, int sync);
long congestion_wait(int sync, long timeout);
long wait_iff_congested(struct zone *zone, int sync, long timeout);
+int pdflush_proc_obsolete(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
{
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0edb65dd8edd..7b7ac9ccec7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,6 +160,7 @@ enum rq_flag_bits {
__REQ_FLUSH_SEQ, /* request for flush sequence */
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
+ __REQ_KERNEL, /* direct IO to kernel pages */
__REQ_NR_BITS, /* stops here */
};
@@ -201,5 +202,6 @@ enum rq_flag_bits {
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
+#define REQ_KERNEL (1 << __REQ_KERNEL)
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390ce98b2..dfae957398c3 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -31,7 +31,7 @@ SUBSYS(cpuacct)
/* */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
SUBSYS(mem_cgroup)
#endif
@@ -72,3 +72,9 @@ SUBSYS(net_prio)
#endif
/* */
+
+#ifdef CONFIG_CGROUP_HUGETLB
+SUBSYS(hugetlb)
+#endif
+
+/* */
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7f2d60..133ddcf83397 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -58,7 +58,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
if (++zone->compact_considered > defer_limit)
zone->compact_considered = defer_limit;
- return zone->compact_considered < (1UL << zone->compact_defer_shift);
+ return zone->compact_considered < defer_limit;
}
#else
@@ -85,7 +85,7 @@ static inline void defer_compaction(struct zone *zone, int order)
static inline bool compaction_deferred(struct zone *zone, int order)
{
- return 1;
+ return true;
}
#endif /* CONFIG_COMPACTION */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b178f9e91e23..d7eed5b98ae2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,6 +165,8 @@ struct inodes_stat_t {
#define READ 0
#define WRITE RW_MASK
#define READA RWA_MASK
+#define KERNEL_READ (READ|REQ_KERNEL)
+#define KERNEL_WRITE (WRITE|REQ_KERNEL)
#define READ_SYNC (READ | REQ_SYNC)
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
@@ -427,6 +429,7 @@ struct kstatfs;
struct vm_area_struct;
struct vfsmount;
struct cred;
+struct swap_info_struct;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -636,6 +639,11 @@ struct address_space_operations {
int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
unsigned long);
int (*error_remove_page)(struct address_space *, struct page *);
+
+ /* swapfile support */
+ int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
+ sector_t *span);
+ void (*swap_deactivate)(struct file *file);
};
extern const struct address_space_operations empty_aops;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1e49be49d324..4883f393f50a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -23,6 +23,7 @@ struct vm_area_struct;
#define ___GFP_REPEAT 0x400u
#define ___GFP_NOFAIL 0x800u
#define ___GFP_NORETRY 0x1000u
+#define ___GFP_MEMALLOC 0x2000u
#define ___GFP_COMP 0x4000u
#define ___GFP_ZERO 0x8000u
#define ___GFP_NOMEMALLOC 0x10000u
@@ -76,9 +77,14 @@ struct vm_area_struct;
#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */
#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */
#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */
+#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
+ * This takes precedence over the
+ * __GFP_MEMALLOC flag if both are
+ * set
+ */
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
@@ -129,7 +135,7 @@ struct vm_area_struct;
/* Control page allocator reclaim behavior */
#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
- __GFP_NORETRY|__GFP_NOMEMALLOC)
+ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
/* Control slab gfp mask during early boot */
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
@@ -379,6 +385,9 @@ void drain_local_pages(void *dummy);
*/
extern gfp_t gfp_allowed_mask;
+/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
+bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
+
extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 774fa47b3b5b..ef788b5b4a35 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -39,10 +39,17 @@ extern unsigned long totalhigh_pages;
void kmap_flush_unused(void);
+struct page *kmap_to_page(void *addr);
+
#else /* CONFIG_HIGHMEM */
static inline unsigned int nr_free_highpages(void) { return 0; }
+static inline struct page *kmap_to_page(void *addr)
+{
+ return virt_to_page(addr);
+}
+
#define totalhigh_pages 0UL
#ifndef ARCH_HAS_KMAP
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d5d6bbe2259e..225164842ab6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -4,9 +4,11 @@
#include <linux/mm_types.h>
#include <linux/fs.h>
#include <linux/hugetlb_inline.h>
+#include <linux/cgroup.h>
struct ctl_table;
struct user_struct;
+struct mmu_gather;
#ifdef CONFIG_HUGETLB_PAGE
@@ -20,6 +22,11 @@ struct hugepage_subpool {
long max_hpages, used_hpages;
};
+extern spinlock_t hugetlb_lock;
+extern int hugetlb_max_hstate __read_mostly;
+#define for_each_hstate(h) \
+ for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++)
+
struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
void hugepage_put_subpool(struct hugepage_subpool *spool);
@@ -40,9 +47,14 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, int *, int, unsigned int flags);
void unmap_hugepage_range(struct vm_area_struct *,
- unsigned long, unsigned long, struct page *);
-void __unmap_hugepage_range(struct vm_area_struct *,
- unsigned long, unsigned long, struct page *);
+ unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct page *ref_page);
+void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct page *ref_page);
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
void hugetlb_report_meminfo(struct seq_file *);
int hugetlb_report_node_meminfo(int, char *);
@@ -98,7 +110,6 @@ static inline unsigned long hugetlb_total_pages(void)
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
-#define unmap_hugepage_range(vma, start, end, page) BUG()
static inline void hugetlb_report_meminfo(struct seq_file *m)
{
}
@@ -112,13 +123,31 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
-#define dequeue_hwpoisoned_huge_page(page) 0
+static inline int dequeue_hwpoisoned_huge_page(struct page *page)
+{
+ return 0;
+}
+
static inline void copy_huge_page(struct page *dst, struct page *src)
{
}
#define hugetlb_change_protection(vma, address, end, newprot)
+static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, struct page *ref_page)
+{
+ BUG();
+}
+
+static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, struct page *ref_page)
+{
+ BUG();
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#define HUGETLB_ANON_FILE "anon_hugepage"
@@ -199,10 +228,15 @@ struct hstate {
unsigned long resv_huge_pages;
unsigned long surplus_huge_pages;
unsigned long nr_overcommit_huge_pages;
+ struct list_head hugepage_activelist;
struct list_head hugepage_freelists[MAX_NUMNODES];
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+#ifdef CONFIG_CGROUP_HUGETLB
+ /* cgroup control files */
+ struct cftype cgroup_files[5];
+#endif
char name[HSTATE_NAME_LEN];
};
@@ -302,6 +336,11 @@ static inline unsigned hstate_index_to_shift(unsigned index)
return hstates[index].order + PAGE_SHIFT;
}
+static inline int hstate_index(struct hstate *h)
+{
+ return h - hstates;
+}
+
#else
struct hstate {};
#define alloc_huge_page_node(h, nid) NULL
@@ -320,6 +359,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
return 1;
}
#define hstate_index_to_shift(index) 0
+#define hstate_index(h) 0
#endif
#endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
new file mode 100644
index 000000000000..d73878c694b3
--- /dev/null
+++ b/include/linux/hugetlb_cgroup.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#ifndef _LINUX_HUGETLB_CGROUP_H
+#define _LINUX_HUGETLB_CGROUP_H
+
+#include <linux/res_counter.h>
+
+struct hugetlb_cgroup;
+/*
+ * Minimum page order trackable by hugetlb cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ */
+#define HUGETLB_CGROUP_MIN_ORDER 2
+
+#ifdef CONFIG_CGROUP_HUGETLB
+
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+{
+ VM_BUG_ON(!PageHuge(page));
+
+ if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+ return NULL;
+ return (struct hugetlb_cgroup *)page[2].lru.next;
+}
+
+static inline
+int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+{
+ VM_BUG_ON(!PageHuge(page));
+
+ if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+ return -1;
+ page[2].lru.next = (void *)h_cg;
+ return 0;
+}
+
+static inline bool hugetlb_cgroup_disabled(void)
+{
+ if (hugetlb_subsys.disabled)
+ return true;
+ return false;
+}
+
+extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr);
+extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg,
+ struct page *page);
+extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+ struct page *page);
+extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg);
+extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_migrate(struct page *oldhpage,
+ struct page *newhpage);
+
+#else
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+{
+ return NULL;
+}
+
+static inline
+int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+{
+ return 0;
+}
+
+static inline bool hugetlb_cgroup_disabled(void)
+{
+ return true;
+}
+
+static inline int
+hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr)
+{
+ return 0;
+}
+
+static inline void
+hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg,
+ struct page *page)
+{
+ return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
+{
+ return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg)
+{
+ return;
+}
+
+static inline int __init hugetlb_cgroup_file_init(int idx)
+{
+ return 0;
+}
+
+static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
+ struct page *newhpage)
+{
+ return;
+}
+
+#endif /* CONFIG_MEM_RES_CTLR_HUGETLB */
+#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83e7ba90d6e5..8d9489fdab2e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
* (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
@@ -72,8 +72,6 @@ extern void mem_cgroup_uncharge_end(void);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
- int order);
bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
struct mem_cgroup *memcg);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
@@ -100,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
-extern int
-mem_cgroup_prepare_migration(struct page *page,
- struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
+extern void
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
+ struct mem_cgroup **memcgp);
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok);
@@ -124,7 +122,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
extern void mem_cgroup_replace_page_cache(struct page *oldpage,
struct page *newpage);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account;
#endif
@@ -182,7 +180,6 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -193,7 +190,7 @@ void mem_cgroup_split_huge_fixup(struct page *head);
bool mem_cgroup_bad_page_check(struct page *page);
void mem_cgroup_print_bad_page(struct page *page);
#endif
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_MEMCG */
struct mem_cgroup;
static inline int mem_cgroup_newpage_charge(struct page *page,
@@ -279,11 +276,10 @@ static inline struct cgroup_subsys_state
return NULL;
}
-static inline int
+static inline void
mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp, gfp_t gfp_mask)
+ struct mem_cgroup **memcgp)
{
- return 0;
}
static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
@@ -366,12 +362,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
return 0;
}
-static inline
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
-{
- return 0;
-}
-
static inline void mem_cgroup_split_huge_fixup(struct page *head)
{
}
@@ -384,9 +374,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
struct page *newpage)
{
}
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#endif /* CONFIG_MEMCG */
-#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
+#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
static inline bool
mem_cgroup_bad_page_check(struct page *page)
{
@@ -406,7 +396,7 @@ enum {
};
struct sock;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
void sock_update_memcg(struct sock *sk);
void sock_release_memcg(struct sock *sk);
#else
@@ -416,6 +406,6 @@ static inline void sock_update_memcg(struct sock *sk)
static inline void sock_release_memcg(struct sock *sk)
{
}
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 855c337b20c3..ce7e6671968b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -15,7 +15,7 @@ extern int migrate_page(struct address_space *,
extern int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode);
-extern int migrate_huge_pages(struct list_head *l, new_page_t x,
+extern int migrate_huge_page(struct page *, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode);
@@ -36,7 +36,7 @@ static inline void putback_lru_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
-static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
+static inline int migrate_huge_page(struct page *page, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9f279cf5b1b..bd079a1b0fdc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -805,6 +805,17 @@ static inline void *page_rmapping(struct page *page)
return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
}
+extern struct address_space *__page_file_mapping(struct page *);
+
+static inline
+struct address_space *page_file_mapping(struct page *page)
+{
+ if (unlikely(PageSwapCache(page)))
+ return __page_file_mapping(page);
+
+ return page->mapping;
+}
+
static inline int PageAnon(struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
@@ -821,6 +832,20 @@ static inline pgoff_t page_index(struct page *page)
return page->index;
}
+extern pgoff_t __page_file_index(struct page *page);
+
+/*
+ * Return the file index of the page. Regular pagecache pages use ->index
+ * whereas swapcache pages use swp_offset(->private)
+ */
+static inline pgoff_t page_file_index(struct page *page)
+{
+ if (unlikely(PageSwapCache(page)))
+ return __page_file_index(page);
+
+ return page->index;
+}
+
/*
* Return true if this page is mapped into pagetables.
*/
@@ -994,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct page **pages, struct vm_area_struct **vmas);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+struct kvec;
+int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
+ struct page **pages);
+int get_kernel_page(unsigned long start, int write, struct page **pages);
struct page *get_dump_page(unsigned long addr);
extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
@@ -1331,6 +1360,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_reset(struct zone *zone);
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
@@ -1528,6 +1558,7 @@ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
static inline void vm_stat_account(struct mm_struct *mm,
unsigned long flags, struct file *file, long pages)
{
+ mm->total_vm += pages;
}
#endif /* CONFIG_PROC_FS */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 074eb98fe15d..bf7867200b95 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -54,6 +54,15 @@ struct page {
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* slub/slob first free object */
+ bool pfmemalloc; /* If set by the page allocator,
+ * ALLOC_NO_WATERMARKS was set
+ * and the low watermark was not
+ * met implying that the system
+ * is under some pressure. The
+ * caller should try ensure
+ * this page is only used to
+ * free other pages.
+ */
};
union {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 458988bd55a1..2daa54f55db7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -201,7 +201,7 @@ struct zone_reclaim_stat {
struct lruvec {
struct list_head lists[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
struct zone *zone;
#endif
};
@@ -209,7 +209,6 @@ struct lruvec {
/* Mask used at gathering information at once (see memcontrol.c) */
#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
-#define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON)
#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
/* Isolate clean file */
@@ -369,6 +368,10 @@ struct zone {
*/
spinlock_t lock;
int all_unreclaimable; /* All pages pinned */
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ /* pfn where the last incremental compaction isolated free pages */
+ unsigned long compact_cached_free_pfn;
+#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
@@ -475,6 +478,14 @@ struct zone {
* rarely used fields:
*/
const char *name;
+#ifdef CONFIG_MEMORY_ISOLATION
+ /*
+ * the number of MIGRATE_ISOLATE *pageblock*.
+ * We need this for free page counting. Look at zone_watermark_ok_safe.
+ * It's protected by zone->lock
+ */
+ int nr_pageblock_isolate;
+#endif
} ____cacheline_internodealigned_in_smp;
typedef enum {
@@ -671,7 +682,7 @@ typedef struct pglist_data {
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
struct page_cgroup *node_page_cgroup;
#endif
#endif
@@ -694,6 +705,7 @@ typedef struct pglist_data {
range, including holes */
int node_id;
wait_queue_head_t kswapd_wait;
+ wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
int kswapd_max_order;
enum zone_type classzone_idx;
@@ -718,7 +730,7 @@ typedef struct pglist_data {
#include <linux/memory_hotplug.h>
extern struct mutex zonelists_mutex;
-void build_all_zonelists(void *data);
+void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
@@ -736,7 +748,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
static inline struct zone *lruvec_zone(struct lruvec *lruvec)
{
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
return lruvec->zone;
#else
return container_of(lruvec, struct zone, lruvec);
@@ -773,7 +785,7 @@ extern int movable_zone;
static inline int zone_movable_is_highmem(void)
{
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
return movable_zone == ZONE_HIGHMEM;
#else
return 0;
@@ -1052,7 +1064,7 @@ struct mem_section {
/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
/*
* If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
* section. (see memcontrol.h/page_cgroup.h about this.)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2889877318bc..1f8fc7f9bcd8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,10 +473,10 @@ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
const struct iovec *iov, unsigned long nr_segs,
- loff_t pos);
+ loff_t pos, bool uio);
extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
const struct iovec *iov, unsigned long nr_segs,
- loff_t pos);
+ loff_t pos, bool uio);
/*
* linux/fs/nfs/dir.c
diff --git a/include/linux/oom.h b/include/linux/oom.h
index e4c29bc72e70..49a3031fda50 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,15 +40,36 @@ enum oom_constraint {
CONSTRAINT_MEMCG,
};
+enum oom_scan_t {
+ OOM_SCAN_OK, /* scan thread and find its badness */
+ OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */
+ OOM_SCAN_ABORT, /* abort the iteration and return */
+ OOM_SCAN_SELECT, /* always select this thread first */
+};
+
extern void compare_swap_oom_score_adj(int old_val, int new_val);
extern int test_set_oom_score_adj(int new_val);
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
+extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+ unsigned int points, unsigned long totalpages,
+ struct mem_cgroup *memcg, nodemask_t *nodemask,
+ const char *message);
+
extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
+ int order, const nodemask_t *nodemask);
+
+extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
+ unsigned long totalpages, const nodemask_t *nodemask,
+ bool force_kill);
+extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ int order);
+
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
int order, nodemask_t *mask, bool force_kill);
extern int register_oom_notifier(struct notifier_block *nb);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9451af..b5d13841604e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/bug.h>
+#include <linux/mmdebug.h>
#ifndef __GENERATING_BOUNDS_H
#include <linux/mm_types.h>
#include <generated/bounds.h>
@@ -453,6 +454,34 @@ static inline int PageTransTail(struct page *page)
}
#endif
+/*
+ * If network-based swap is enabled, sl*b must keep track of whether pages
+ * were allocated from pfmemalloc reserves.
+ */
+static inline int PageSlabPfmemalloc(struct page *page)
+{
+ VM_BUG_ON(!PageSlab(page));
+ return PageActive(page);
+}
+
+static inline void SetPageSlabPfmemalloc(struct page *page)
+{
+ VM_BUG_ON(!PageSlab(page));
+ SetPageActive(page);
+}
+
+static inline void __ClearPageSlabPfmemalloc(struct page *page)
+{
+ VM_BUG_ON(!PageSlab(page));
+ __ClearPageActive(page);
+}
+
+static inline void ClearPageSlabPfmemalloc(struct page *page)
+{
+ VM_BUG_ON(!PageSlab(page));
+ ClearPageActive(page);
+}
+
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1 << PG_mlocked)
#else
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3bdcab30ca41..105077aa7685 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -1,6 +1,11 @@
#ifndef __LINUX_PAGEISOLATION_H
#define __LINUX_PAGEISOLATION_H
+
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
+void set_pageblock_migratetype(struct page *page, int migratetype);
+int move_freepages_block(struct zone *zone, struct page *page,
+ int migratetype);
/*
* Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
* If specified range includes migrate types other than MOVABLE or CMA,
@@ -10,7 +15,7 @@
* free all pages in the range. test_page_isolated() can be used for
* test it.
*/
-extern int
+int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype);
@@ -18,7 +23,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
* target range is [start_pfn, end_pfn)
*/
-extern int
+int
undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype);
@@ -30,8 +35,8 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
/*
* Internal functions. Changes pageblock's migrate type.
*/
-extern int set_migratetype_isolate(struct page *page);
-extern void unset_migratetype_isolate(struct page *page, unsigned migratetype);
+int set_migratetype_isolate(struct page *page);
+void unset_migratetype_isolate(struct page *page, unsigned migratetype);
#endif
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index a88cdba27809..777a524716db 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -12,7 +12,7 @@ enum {
#ifndef __GENERATING_BOUNDS_H
#include <generated/bounds.h>
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
#include <linux/bit_spinlock.h>
/*
@@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
bit_spin_unlock(PCG_LOCK, &pc->flags);
}
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_MEMCG */
struct page_cgroup;
static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
@@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void)
{
}
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#endif /* CONFIG_MEMCG */
#include <linux/swap.h>
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
unsigned short old, unsigned short new);
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
@@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type)
return;
}
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
+#endif /* CONFIG_MEMCG_SWAP */
#endif /* !__GENERATING_BOUNDS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7cfad3bbb0cc..e42c762f0dc7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -286,6 +286,11 @@ static inline loff_t page_offset(struct page *page)
return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
}
+static inline loff_t page_file_offset(struct page *page)
+{
+ return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68dcffaa62a0..c147e7024f11 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1584,7 +1584,7 @@ struct task_struct {
/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
+#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
struct memcg_batch_info {
int do_batch; /* incremented when batch uncharge started */
struct mem_cgroup *memcg; /* target memcg of uncharge */
@@ -1894,6 +1894,13 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif
+static inline void tsk_restore_flags(struct task_struct *task,
+ unsigned long orig_flags, unsigned long flags)
+{
+ task->flags &= ~flags;
+ task->flags |= orig_flags & flags;
+}
+
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 07ceb97d53fa..ac6b8ee07825 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -20,7 +20,6 @@ struct shrink_control {
* 'nr_to_scan' entries and attempt to free them up. It should return
* the number of objects which remain in the cache. If it returns -1, it means
* it cannot do any scanning at this time (eg. there is a risk of deadlock).
- * The callback must not return -1 if nr_to_scan is zero.
*
* The 'gfpmask' refers to the allocation we are currently trying to
* fulfil.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d205c4be7f5b..7632c87da2c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -462,6 +462,7 @@ struct sk_buff {
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
+ __u8 pfmemalloc:1;
__u8 ooo_okay:1;
__u8 l4_rxhash:1;
__u8 wifi_acked_valid:1;
@@ -502,6 +503,15 @@ struct sk_buff {
#include <linux/slab.h>
+#define SKB_ALLOC_FCLONE 0x01
+#define SKB_ALLOC_RX 0x02
+
+/* Returns true if the skb was allocated from PFMEMALLOC reserves */
+static inline bool skb_pfmemalloc(const struct sk_buff *skb)
+{
+ return unlikely(skb->pfmemalloc);
+}
+
/*
* skb might have a dst pointer attached, refcounted or not.
* _skb_refdst low order bit is set if refcount was _not_ taken
@@ -565,7 +575,7 @@ extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize);
extern struct sk_buff *__alloc_skb(unsigned int size,
- gfp_t priority, int fclone, int node);
+ gfp_t priority, int flags, int node);
extern struct sk_buff *build_skb(void *data, unsigned int frag_size);
static inline struct sk_buff *alloc_skb(unsigned int size,
gfp_t priority)
@@ -576,7 +586,7 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
gfp_t priority)
{
- return __alloc_skb(size, priority, 1, NUMA_NO_NODE);
+ return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
}
extern void skb_recycle(struct sk_buff *skb);
@@ -1237,6 +1247,17 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ /*
+ * Propagate page->pfmemalloc to the skb if we can. The problem is
+ * that not all callers have unique ownership of the page. If
+ * pfmemalloc is set, we check the mapping as a mapping implies
+ * page->index is set (index and pfmemalloc share space).
+ * If it's a valid mapping, we cannot use page->pfmemalloc but we
+ * do not lose pfmemalloc information as the pages would not be
+ * allocated using __GFP_MEMALLOC.
+ */
+ if (page->pfmemalloc && !page->mapping)
+ skb->pfmemalloc = true;
frag->page.p = page;
frag->page_offset = off;
skb_frag_size_set(frag, size);
@@ -1753,6 +1774,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
}
+/*
+ * __skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data
+ * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ * @order: size of the allocation
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+*/
+static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
+ struct sk_buff *skb,
+ unsigned int order)
+{
+ struct page *page;
+
+ gfp_mask |= __GFP_COLD;
+
+ if (!(gfp_mask & __GFP_NOMEMALLOC))
+ gfp_mask |= __GFP_MEMALLOC;
+
+ page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+ if (skb && page && page->pfmemalloc)
+ skb->pfmemalloc = true;
+
+ return page;
+}
+
+/**
+ * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
+ * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
+ struct sk_buff *skb)
+{
+ return __skb_alloc_pages(gfp_mask, skb, 0);
+}
+
+/**
+ * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
+ * @page: The page that was allocated from skb_alloc_page
+ * @skb: The skb that may need pfmemalloc set
+ */
+static inline void skb_propagate_pfmemalloc(struct page *page,
+ struct sk_buff *skb)
+{
+ if (page && page->pfmemalloc)
+ skb->pfmemalloc = true;
+}
+
/**
* skb_frag_page - retrieve the page refered to by a paged fragment
* @frag: the paged fragment
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 77d278defa70..cff40aa7db62 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -174,6 +174,8 @@ struct rpc_xprt {
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
resvport : 1; /* use a reserved port */
+ unsigned int swapper; /* we're swapping over this
+ transport */
unsigned int bind_index; /* bind function index */
/*
@@ -316,6 +318,7 @@ void xprt_release_rqst_cong(struct rpc_task *task);
void xprt_disconnect_done(struct rpc_xprt *xprt);
void xprt_force_disconnect(struct rpc_xprt *xprt);
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
+int xs_swapper(struct rpc_xprt *xprt, int enable);
/*
* Reserved bit positions in xprt->state
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c84ec68eaec9..388e70601413 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -151,6 +151,7 @@ enum {
SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
SWP_BLKDEV = (1 << 6), /* its a block device */
+ SWP_FILE = (1 << 7), /* set after swap_activate success */
/* add others here before... */
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
};
@@ -301,7 +302,7 @@ static inline void scan_unevictable_unregister_node(struct node *node)
extern int kswapd_run(int nid);
extern void kswapd_stop(int nid);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
@@ -309,7 +310,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
return vm_swappiness;
}
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
#else
static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
@@ -320,8 +321,14 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
/* linux/mm/page_io.c */
extern int swap_readpage(struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
+extern int swap_set_page_dirty(struct page *page);
extern void end_swap_bio_read(struct bio *bio, int err);
+int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
+ unsigned long nr_pages, sector_t start_block);
+int generic_swapfile_activate(struct swap_info_struct *, struct file *,
+ sector_t *);
+
/* linux/mm/swap_state.c */
extern struct address_space swapper_space;
#define total_swapcache_pages swapper_space.nrpages
@@ -356,11 +363,12 @@ extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
+extern struct swap_info_struct *page_swap_info(struct page *);
extern int reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
extern void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
#else
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 06f8e3858251..57f7b1091511 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -30,6 +30,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGSTEAL_DIRECT),
FOR_ALL_ZONES(PGSCAN_KSWAPD),
FOR_ALL_ZONES(PGSCAN_DIRECT),
+ PGSCAN_DIRECT_THROTTLE,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 65efb92da996..ad2cfd53dadc 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -179,11 +179,6 @@ extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
-static inline void zap_zone_vm_stats(struct zone *zone)
-{
- memset(zone->vm_stat, 0, sizeof(zone->vm_stat));
-}
-
extern void inc_zone_state(struct zone *, enum zone_stat_item);
#ifdef CONFIG_SMP
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 6d0a0fcd80e7..c66fe3332d83 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -189,9 +189,4 @@ void tag_pages_for_writeback(struct address_space *mapping,
void account_page_redirty(struct page *page);
-/* pdflush.c */
-extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
- read-only. */
-
-
#endif /* WRITEBACK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index e067f8c18f88..b3730239bf18 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -621,6 +621,7 @@ enum sock_flags {
SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
+ SOCK_MEMALLOC, /* VM depends on this socket for swapping */
SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */
SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */
SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */
@@ -658,6 +659,26 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
return test_bit(flag, &sk->sk_flags);
}
+#ifdef CONFIG_NET
+extern struct static_key memalloc_socks;
+static inline int sk_memalloc_socks(void)
+{
+ return static_key_false(&memalloc_socks);
+}
+#else
+
+static inline int sk_memalloc_socks(void)
+{
+ return 0;
+}
+
+#endif
+
+static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask)
+{
+ return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC);
+}
+
static inline void sk_acceptq_removed(struct sock *sk)
{
sk->sk_ack_backlog--;
@@ -733,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s
return 0;
}
+extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+
static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+ return __sk_backlog_rcv(sk, skb);
+
return sk->sk_backlog_rcv(sk, skb);
}
@@ -798,6 +824,8 @@ extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
extern int sk_stream_error(struct sock *sk, int flags, int err);
extern void sk_stream_kill_queues(struct sock *sk);
+extern void sk_set_memalloc(struct sock *sk);
+extern void sk_clear_memalloc(struct sock *sk);
extern int sk_wait_data(struct sock *sk, long *timeo);
@@ -913,7 +941,7 @@ struct proto {
#ifdef SOCK_REFCNT_DEBUG
atomic_t socks;
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
/*
* cgroup specific init/deinit functions. Called once for all
* protocols that implement it, from cgroups populate function.
@@ -994,7 +1022,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk)
#define sk_refcnt_debug_release(sk) do { } while (0)
#endif /* SOCK_REFCNT_DEBUG */
-#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET)
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
extern struct static_key memcg_socket_limit_enabled;
static inline struct cg_proto *parent_cg_proto(struct proto *proto,
struct cg_proto *cg_proto)
@@ -1301,12 +1329,14 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
__sk_mem_schedule(sk, size, SK_MEM_SEND);
}
-static inline bool sk_rmem_schedule(struct sock *sk, int size)
+static inline bool
+sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size)
{
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_RECV);
+ return size<= sk->sk_forward_alloc ||
+ __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
+ skb_pfmemalloc(skb);
}
static inline void sk_mem_reclaim(struct sock *sk)
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 9fe3a36646e9..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -30,6 +30,7 @@
{(unsigned long)__GFP_COMP, "GFP_COMP"}, \
{(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \
{(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \
+ {(unsigned long)__GFP_MEMALLOC, "GFP_MEMALLOC"}, \
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \