summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 11:41:44 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 11:41:44 -0800
commit875fc4f5ddf35605581f9a5900c14afef48611f2 (patch)
treee237a28a71a5d1e72eaf0ecda737eb5c8614c72c /include
parent7d1fc01afc5af35e5197e0e75abe900f6bd279b8 (diff)
parent7dfa4612204b511c934ca2a0e4f306f9981bd9aa (diff)
downloadlinux-875fc4f5ddf35605581f9a5900c14afef48611f2.tar.gz
linux-875fc4f5ddf35605581f9a5900c14afef48611f2.tar.bz2
linux-875fc4f5ddf35605581f9a5900c14afef48611f2.zip
Merge branch 'akpm' (patches from Andrew)
Merge first patch-bomb from Andrew Morton: - A few hotfixes which missed 4.4 becasue I was asleep. cc'ed to -stable - A few misc fixes - OCFS2 updates - Part of MM. Including pretty large changes to page-flags handling and to thp management which have been buffered up for 2-3 cycles now. I have a lot of MM material this time. [ It turns out the THP part wasn't quite ready, so that got dropped from this series - Linus ] * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits) zsmalloc: reorganize struct size_class to pack 4 bytes hole mm/zbud.c: use list_last_entry() instead of list_tail_entry() zram/zcomp: do not zero out zcomp private pages zram: pass gfp from zcomp frontend to backend zram: try vmalloc() after kmalloc() zram/zcomp: use GFP_NOIO to allocate streams mm: add tracepoint for scanning pages drivers/base/memory.c: fix kernel warning during memory hotplug on ppc64 mm/page_isolation: use macro to judge the alignment mm: fix noisy sparse warning in LIBCFS_ALLOC_PRE() mm: rework virtual memory accounting include/linux/memblock.h: fix ordering of 'flags' argument in comments mm: move lru_to_page to mm_inline.h Documentation/filesystems: describe the shared memory usage/accounting memory-hotplug: don't BUG() in register_memory_resource() hugetlb: make mm and fs code explicitly non-modular mm/swapfile.c: use list_for_each_entry_safe in free_swap_count_continuations mm: /proc/pid/clear_refs: no need to clear VM_SOFTDIRTY in clear_soft_dirty_pmd() mm: make sure isolate_lru_page() is never called for tail page vmstat: make vmstat_updater deferrable again and shut down on idle ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/memory_model.h4
-rw-r--r--include/linux/dcache.h4
-rw-r--r--include/linux/fsnotify_backend.h5
-rw-r--r--include/linux/gfp.h22
-rw-r--r--include/linux/hugetlb.h10
-rw-r--r--include/linux/memblock.h13
-rw-r--r--include/linux/memcontrol.h102
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mm.h46
-rw-r--r--include/linux/mm_inline.h2
-rw-r--r--include/linux/mm_types.h9
-rw-r--r--include/linux/mmzone.h37
-rw-r--r--include/linux/pfn.h1
-rw-r--r--include/linux/shmem_fs.h4
-rw-r--r--include/linux/slab.h5
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/thread_info.h5
-rw-r--r--include/linux/vmalloc.h1
-rw-r--r--include/linux/vmpressure.h7
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/net/sock.h138
-rw-r--r--include/net/tcp.h5
-rw-r--r--include/net/tcp_memcontrol.h1
-rw-r--r--include/trace/events/huge_memory.h136
-rw-r--r--include/trace/events/page_isolation.h38
-rw-r--r--include/trace/events/vmscan.h21
26 files changed, 371 insertions, 250 deletions
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 4b4b056a6eb0..5148150cc80b 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -1,6 +1,8 @@
#ifndef __ASM_MEMORY_MODEL_H
#define __ASM_MEMORY_MODEL_H
+#include <linux/pfn.h>
+
#ifndef __ASSEMBLY__
#if defined(CONFIG_FLATMEM)
@@ -72,7 +74,7 @@
/*
* Convert a physical address to a Page Frame Number and back
*/
-#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __phys_to_pfn(paddr) PHYS_PFN(paddr)
#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
#define page_to_pfn __page_to_pfn
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d67ae119cf4e..7781ce110503 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -27,10 +27,10 @@ struct vfsmount;
/* The hash is always the low bits of hash_len */
#ifdef __LITTLE_ENDIAN
- #define HASH_LEN_DECLARE u32 hash; u32 len;
+ #define HASH_LEN_DECLARE u32 hash; u32 len
#define bytemask_from_count(cnt) (~(~0ul << (cnt)*8))
#else
- #define HASH_LEN_DECLARE u32 len; u32 hash;
+ #define HASH_LEN_DECLARE u32 len; u32 hash
#define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8))
#endif
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 533c4408529a..6b7e89f45aa4 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -220,7 +220,10 @@ struct fsnotify_mark {
/* List of marks by group->i_fsnotify_marks. Also reused for queueing
* mark into destroy_list when it's waiting for the end of SRCU period
* before it can be freed. [group->mark_mutex] */
- struct list_head g_list;
+ union {
+ struct list_head g_list;
+ struct rcu_head g_rcu;
+ };
/* Protects inode / mnt pointers, flags, masks */
spinlock_t lock;
/* List of marks for inode / vfsmount [obj_lock] */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8942af0813e3..28ad5f6494b0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,7 +30,7 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_ATOMIC 0x80000u
-#define ___GFP_NOACCOUNT 0x100000u
+#define ___GFP_ACCOUNT 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_DIRECT_RECLAIM 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -73,11 +73,15 @@ struct vm_area_struct;
*
* __GFP_THISNODE forces the allocation to be satisified from the requested
* node with no fallbacks or placement policy enforcements.
+ *
+ * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant
+ * to kmem allocations).
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
+#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
/*
* Watermark modifiers -- controls access to emergency reserves
@@ -104,7 +108,6 @@ struct vm_area_struct;
#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
-#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT)
/*
* Reclaim modifiers
@@ -197,6 +200,9 @@ struct vm_area_struct;
* GFP_KERNEL is typical for kernel-internal allocations. The caller requires
* ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
*
+ * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
* GFP_NOWAIT is for kernel allocations that should not stall for direct
* reclaim, start physical IO or use any filesystem callback.
*
@@ -236,6 +242,7 @@ struct vm_area_struct;
*/
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
#define GFP_NOIO (__GFP_RECLAIM)
#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
@@ -271,7 +278,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
{
- return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
+ return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
}
#ifdef CONFIG_HIGHMEM
@@ -377,10 +384,11 @@ static inline enum zone_type gfp_zone(gfp_t flags)
static inline int gfp_zonelist(gfp_t flags)
{
- if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE))
- return 1;
-
- return 0;
+#ifdef CONFIG_NUMA
+ if (unlikely(flags & __GFP_THISNODE))
+ return ZONELIST_NOFALLBACK;
+#endif
+ return ZONELIST_FALLBACK;
}
/*
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b0eb06423d5e..e76574d8f9b5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -263,20 +263,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct user_struct **user, int creat_flags,
int page_size_log);
-static inline int is_file_hugepages(struct file *file)
+static inline bool is_file_hugepages(struct file *file)
{
if (file->f_op == &hugetlbfs_file_operations)
- return 1;
- if (is_file_shm_hugepages(file))
- return 1;
+ return true;
- return 0;
+ return is_file_shm_hugepages(file);
}
#else /* !CONFIG_HUGETLBFS */
-#define is_file_hugepages(file) 0
+#define is_file_hugepages(file) false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
struct user_struct **user, int creat_flags,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index fec66f86eeff..173fb44e22f1 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -216,10 +216,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
* for_each_free_mem_range - iterate through free memblock areas
* @i: u64 used as loop variable
* @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @flags: pick from blocks based on memory attributes
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
* @p_nid: ptr to int for nid of the range, can be %NULL
- * @flags: pick from blocks based on memory attributes
*
* Walks over free (memory && !reserved) areas of memblock. Available as
* soon as memblock is initialized.
@@ -232,10 +232,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
* for_each_free_mem_range_reverse - rev-iterate through free memblock areas
* @i: u64 used as loop variable
* @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @flags: pick from blocks based on memory attributes
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
* @p_nid: ptr to int for nid of the range, can be %NULL
- * @flags: pick from blocks based on memory attributes
*
* Walks over free (memory && !reserved) areas of memblock in reverse
* order. Available as soon as memblock is initialized.
@@ -325,10 +325,10 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
-int memblock_is_memory(phys_addr_t addr);
+bool memblock_is_memory(phys_addr_t addr);
int memblock_is_map_memory(phys_addr_t addr);
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
-int memblock_is_reserved(phys_addr_t addr);
+bool memblock_is_reserved(phys_addr_t addr);
bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
extern void __memblock_dump_all(void);
@@ -399,6 +399,11 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \
region++)
+#define for_each_memblock_type(memblock_type, rgn) \
+ idx = 0; \
+ rgn = &memblock_type->regions[idx]; \
+ for (idx = 0; idx < memblock_type->cnt; \
+ idx++,rgn = &memblock_type->regions[idx])
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
#define __init_memblock __meminit
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index cd0e2413c358..2292468f2a30 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,32 +85,10 @@ enum mem_cgroup_events_target {
MEM_CGROUP_NTARGETS,
};
-/*
- * Bits in struct cg_proto.flags
- */
-enum cg_proto_flags {
- /* Currently active and new sockets should be assigned to cgroups */
- MEMCG_SOCK_ACTIVE,
- /* It was ever activated; we must disarm static keys on destruction */
- MEMCG_SOCK_ACTIVATED,
-};
-
struct cg_proto {
struct page_counter memory_allocated; /* Current allocated memory. */
- struct percpu_counter sockets_allocated; /* Current number of sockets. */
int memory_pressure;
- long sysctl_mem[3];
- unsigned long flags;
- /*
- * memcg field is used to find which memcg we belong directly
- * Each memcg struct can hold more than one cg_proto, so container_of
- * won't really cut.
- *
- * The elegant solution would be having an inverse function to
- * proto_cgroup in struct proto, but that means polluting the structure
- * for everybody, instead of just for memcg users.
- */
- struct mem_cgroup *memcg;
+ bool active;
};
#ifdef CONFIG_MEMCG
@@ -192,6 +170,9 @@ struct mem_cgroup {
unsigned long low;
unsigned long high;
+ /* Range enforcement for interrupt charges */
+ struct work_struct high_work;
+
unsigned long soft_limit;
/* vmpressure notifications */
@@ -268,6 +249,10 @@ struct mem_cgroup {
struct wb_domain cgwb_domain;
#endif
+#ifdef CONFIG_INET
+ unsigned long socket_pressure;
+#endif
+
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
@@ -275,7 +260,8 @@ struct mem_cgroup {
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
-extern struct cgroup_subsys_state *mem_cgroup_root_css;
+
+extern struct mem_cgroup *root_mem_cgroup;
/**
* mem_cgroup_events - count memory events against a cgroup
@@ -308,18 +294,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
}
+#define mem_cgroup_from_counter(counter, member) \
+ container_of(counter, struct mem_cgroup, member)
+
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+/**
+ * parent_mem_cgroup - find the accounting parent of a memcg
+ * @memcg: memcg whose parent to find
+ *
+ * Returns the parent memcg, or NULL if this is the root or the memory
+ * controller is in legacy no-hierarchy mode.
+ */
+static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+{
+ if (!memcg->memory.parent)
+ return NULL;
+ return mem_cgroup_from_counter(memcg->memory.parent, memory);
+}
+
static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
struct mem_cgroup *root)
{
@@ -671,12 +673,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
}
#endif /* CONFIG_MEMCG */
-enum {
- UNDER_LIMIT,
- SOFT_LIMIT,
- OVER_LIMIT,
-};
-
#ifdef CONFIG_CGROUP_WRITEBACK
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
@@ -703,20 +699,35 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
#endif /* CONFIG_CGROUP_WRITEBACK */
struct sock;
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
void sock_update_memcg(struct sock *sk);
void sock_release_memcg(struct sock *sk);
-#else
-static inline void sock_update_memcg(struct sock *sk)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
+extern struct static_key_false memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
+#ifdef CONFIG_MEMCG_KMEM
+ if (memcg->tcp_mem.memory_pressure)
+ return true;
+#endif
+ do {
+ if (time_before(jiffies, memcg->socket_pressure))
+ return true;
+ } while ((memcg = parent_mem_cgroup(memcg)));
+ return false;
}
-static inline void sock_release_memcg(struct sock *sk)
+#else
+#define mem_cgroup_sockets_enabled 0
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
+ return false;
}
-#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
+#endif
#ifdef CONFIG_MEMCG_KMEM
-extern struct static_key memcg_kmem_enabled_key;
+extern struct static_key_false memcg_kmem_enabled_key;
extern int memcg_nr_cache_ids;
void memcg_get_cache_ids(void);
@@ -732,7 +743,7 @@ void memcg_put_cache_ids(void);
static inline bool memcg_kmem_enabled(void)
{
- return static_key_false(&memcg_kmem_enabled_key);
+ return static_branch_unlikely(&memcg_kmem_enabled_key);
}
static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
@@ -766,15 +777,13 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
return memcg ? memcg->kmemcg_id : -1;
}
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
void __memcg_kmem_put_cache(struct kmem_cache *cachep);
-static inline bool __memcg_kmem_bypass(gfp_t gfp)
+static inline bool __memcg_kmem_bypass(void)
{
if (!memcg_kmem_enabled())
return true;
- if (gfp & __GFP_NOACCOUNT)
- return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
return false;
@@ -791,7 +800,9 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp)
static __always_inline int memcg_kmem_charge(struct page *page,
gfp_t gfp, int order)
{
- if (__memcg_kmem_bypass(gfp))
+ if (__memcg_kmem_bypass())
+ return 0;
+ if (!(gfp & __GFP_ACCOUNT))
return 0;
return __memcg_kmem_charge(page, gfp, order);
}
@@ -810,16 +821,15 @@ static __always_inline void memcg_kmem_uncharge(struct page *page, int order)
/**
* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
* @cachep: the original global kmem cache
- * @gfp: allocation flags.
*
* All memory allocated from a per-memcg cache is charged to the owner memcg.
*/
static __always_inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
- if (__memcg_kmem_bypass(gfp))
+ if (__memcg_kmem_bypass())
return cachep;
- return __memcg_kmem_get_cache(cachep);
+ return __memcg_kmem_get_cache(cachep, gfp);
}
static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3d385c81c153..2696c1f05ed1 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -122,7 +122,7 @@ struct sp_node {
struct shared_policy {
struct rb_root root;
- spinlock_t lock;
+ rwlock_t lock;
};
int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..839d9e9a1c38 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -51,6 +51,17 @@ extern int sysctl_legacy_va_layout;
#define sysctl_legacy_va_layout 0
#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+extern const int mmap_rnd_bits_min;
+extern const int mmap_rnd_bits_max;
+extern int mmap_rnd_bits __read_mostly;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+extern const int mmap_rnd_compat_bits_min;
+extern const int mmap_rnd_compat_bits_max;
+extern int mmap_rnd_compat_bits __read_mostly;
+#endif
+
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -225,10 +236,14 @@ extern pgprot_t protection_map[16];
* ->fault function. The vma's ->fault is responsible for returning a bitmask
* of VM_FAULT_xxx flags that give details about how the fault was handled.
*
+ * MM layer fills up gfp_mask for page allocations but fault handler might
+ * alter it if its implementation requires a different allocation context.
+ *
* pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
unsigned int flags; /* FAULT_FLAG_xxx flags */
+ gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
void __user *virtual_address; /* Faulting virtual address */
@@ -1361,10 +1376,26 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member)
atomic_long_dec(&mm->rss_stat.count[member]);
}
+/* Optimized variant when page is already known not to be PageAnon */
+static inline int mm_counter_file(struct page *page)
+{
+ if (PageSwapBacked(page))
+ return MM_SHMEMPAGES;
+ return MM_FILEPAGES;
+}
+
+static inline int mm_counter(struct page *page)
+{
+ if (PageAnon(page))
+ return MM_ANONPAGES;
+ return mm_counter_file(page);
+}
+
static inline unsigned long get_mm_rss(struct mm_struct *mm)
{
return get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_ANONPAGES);
+ get_mm_counter(mm, MM_ANONPAGES) +
+ get_mm_counter(mm, MM_SHMEMPAGES);
}
static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
@@ -1898,7 +1929,9 @@ extern void mm_drop_all_locks(struct mm_struct *mm);
extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern struct file *get_mm_exe_file(struct mm_struct *mm);
-extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
+extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
+
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags,
@@ -2116,15 +2149,6 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
-#ifdef CONFIG_PROC_FS
-void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
-#else
-static inline void vm_stat_account(struct mm_struct *mm,
- unsigned long flags, struct file *file, long pages)
-{
- mm->total_vm += pages;
-}
-#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_DEBUG_PAGEALLOC
extern bool _debug_pagealloc_enabled;
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index cf55945c83fb..712e8c37a200 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -100,4 +100,6 @@ static __always_inline enum lru_list page_lru(struct page *page)
return lru;
}
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f8d1492a114f..6bc9a0ce2253 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -369,9 +369,10 @@ struct core_state {
};
enum {
- MM_FILEPAGES,
- MM_ANONPAGES,
- MM_SWAPENTS,
+ MM_FILEPAGES, /* Resident file mapping pages */
+ MM_ANONPAGES, /* Resident anonymous pages */
+ MM_SWAPENTS, /* Anonymous swap entries */
+ MM_SHMEMPAGES, /* Resident shared memory pages */
NR_MM_COUNTERS
};
@@ -426,7 +427,7 @@ struct mm_struct {
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
- unsigned long shared_vm; /* Shared pages (files) */
+ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
unsigned long def_flags;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e23a9e704536..33bb1b19273e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -195,11 +195,6 @@ static inline int is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
-static inline int is_unevictable_lru(enum lru_list lru)
-{
- return (lru == LRU_UNEVICTABLE);
-}
-
struct zone_reclaim_stat {
/*
* The pageout code in vmscan.c keeps track of how many of the
@@ -361,10 +356,10 @@ struct zone {
struct per_cpu_pageset __percpu *pageset;
/*
- * This is a per-zone reserve of pages that should not be
- * considered dirtyable memory.
+ * This is a per-zone reserve of pages that are not available
+ * to userspace allocations.
*/
- unsigned long dirty_balance_reserve;
+ unsigned long totalreserve_pages;
#ifndef CONFIG_SPARSEMEM
/*
@@ -576,19 +571,17 @@ static inline bool zone_is_empty(struct zone *zone)
/* Maximum number of zones on a zonelist */
#define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
+enum {
+ ZONELIST_FALLBACK, /* zonelist with fallback */
#ifdef CONFIG_NUMA
-
-/*
- * The NUMA zonelists are doubled because we need zonelists that restrict the
- * allocations to a single node for __GFP_THISNODE.
- *
- * [0] : Zonelist with fallback
- * [1] : No fallback (__GFP_THISNODE)
- */
-#define MAX_ZONELISTS 2
-#else
-#define MAX_ZONELISTS 1
+ /*
+ * The NUMA zonelists are doubled because we need zonelists that
+ * restrict the allocations to a single node for __GFP_THISNODE.
+ */
+ ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */
#endif
+ MAX_ZONELISTS
+};
/*
* This struct contains information about a zone in a zonelist. It is stored
@@ -1207,13 +1200,13 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
* the zone and PFN linkages are still valid. This is expensive, but walkers
* of the full memmap are extremely rare.
*/
-int memmap_valid_within(unsigned long pfn,
+bool memmap_valid_within(unsigned long pfn,
struct page *page, struct zone *zone);
#else
-static inline int memmap_valid_within(unsigned long pfn,
+static inline bool memmap_valid_within(unsigned long pfn,
struct page *page, struct zone *zone)
{
- return 1;
+ return true;
}
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 7646637221f3..97f3e88aead4 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -9,5 +9,6 @@
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT)
+#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
#endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 50777b5b1e4c..a43f41cb3c43 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -60,6 +60,10 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
extern int shmem_unuse(swp_entry_t entry, struct page *page);
+extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
+extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+
static inline struct page *shmem_read_mapping_page(
struct address_space *mapping, pgoff_t index)
{
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2037a861e367..3ffee7422012 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -86,6 +86,11 @@
#else
# define SLAB_FAILSLAB 0x00000000UL
#endif
+#ifdef CONFIG_MEMCG_KMEM
+# define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */
+#else
+# define SLAB_ACCOUNT 0x00000000UL
+#endif
/* The following flags affect the page allocator grouping pages by mobility */
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7ba7dccaf0e7..066bd21765ad 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -287,7 +287,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
extern unsigned long totalreserve_pages;
-extern unsigned long dirty_balance_reserve;
extern unsigned long nr_free_buffer_pages(void);
extern unsigned long nr_free_pagecache_pages(void);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ff307b548ed3..b4c2a485b28a 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -56,9 +56,10 @@ extern long do_no_restart_syscall(struct restart_block *parm);
#ifdef __KERNEL__
#ifdef CONFIG_DEBUG_STACK_USAGE
-# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
+ __GFP_ZERO)
#else
-# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
+# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK)
#endif
/*
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3bff87a25a42..d1f1d338af20 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -14,7 +14,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */
#define VM_ALLOC 0x00000002 /* vmalloc() */
#define VM_MAP 0x00000004 /* vmap()ed pages */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
-#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 3e4535876d37..3347cc3ec0ab 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -12,6 +12,9 @@
struct vmpressure {
unsigned long scanned;
unsigned long reclaimed;
+
+ unsigned long tree_scanned;
+ unsigned long tree_reclaimed;
/* The lock is used to keep the scanned/reclaimed above in sync. */
struct spinlock sr_lock;
@@ -26,7 +29,7 @@ struct vmpressure {
struct mem_cgroup;
#ifdef CONFIG_MEMCG
-extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
@@ -40,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd);
#else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed) {}
static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
int prio) {}
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3e5d9075960f..73fae8c4a5fb 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
+void quiet_vmstat(void);
void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void);
@@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page,
static inline void refresh_zone_stat_thresholds(void) { }
static inline void cpu_vm_stats_fold(int cpu) { }
+static inline void quiet_vmstat(void) { }
static inline void drain_zonestat(struct zone *zone,
struct per_cpu_pageset *pset) { }
diff --git a/include/net/sock.h b/include/net/sock.h
index e830c1006935..b9e7b3d863a0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -71,22 +71,6 @@
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
-struct cgroup;
-struct cgroup_subsys;
-#ifdef CONFIG_NET
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
-#else
-static inline
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
- return 0;
-}
-static inline
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-}
-#endif
/*
* This structure really needs to be cleaned up.
* Most of it is for TCP, and not used by any of
@@ -245,7 +229,6 @@ struct sock_common {
/* public: */
};
-struct cg_proto;
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with inet_timewait_sock
@@ -310,7 +293,7 @@ struct cg_proto;
* @sk_security: used by security modules
* @sk_mark: generic packet mark
* @sk_cgrp_data: cgroup data for this cgroup
- * @sk_cgrp: this socket's cgroup-specific proto data
+ * @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
@@ -446,7 +429,7 @@ struct sock {
void *sk_security;
#endif
struct sock_cgroup_data sk_cgrp_data;
- struct cg_proto *sk_cgrp;
+ struct mem_cgroup *sk_memcg;
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
@@ -1096,23 +1079,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
#define sk_refcnt_debug_release(sk) do { } while (0)
#endif /* SOCK_REFCNT_DEBUG */
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
-extern struct static_key memcg_socket_limit_enabled;
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
- struct cg_proto *cg_proto)
-{
- return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
-}
-#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
-#else
-#define mem_cgroup_sockets_enabled 0
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
- struct cg_proto *cg_proto)
-{
- return NULL;
-}
-#endif
-
static inline bool sk_stream_memory_free(const struct sock *sk)
{
if (sk->sk_wmem_queued >= sk->sk_sndbuf)
@@ -1139,8 +1105,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
if (!sk->sk_prot->memory_pressure)
return false;
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- return !!sk->sk_cgrp->memory_pressure;
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ return true;
return !!*sk->sk_prot->memory_pressure;
}
@@ -1154,15 +1121,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
if (*memory_pressure)
*memory_pressure = 0;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
- struct cg_proto *cg_proto = sk->sk_cgrp;
- struct proto *prot = sk->sk_prot;
-
- for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
- cg_proto->memory_pressure = 0;
- }
-
}
static inline void sk_enter_memory_pressure(struct sock *sk)
@@ -1170,116 +1128,46 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
if (!sk->sk_prot->enter_memory_pressure)
return;
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
- struct cg_proto *cg_proto = sk->sk_cgrp;
- struct proto *prot = sk->sk_prot;
-
- for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
- cg_proto->memory_pressure = 1;
- }
-
sk->sk_prot->enter_memory_pressure(sk);
}
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long *prot = sk->sk_prot->sysctl_mem;
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- prot = sk->sk_cgrp->sysctl_mem;
- return prot[index];
-}
-
-static inline void memcg_memory_allocated_add(struct cg_proto *prot,
- unsigned long amt,
- int *parent_status)
-{
- page_counter_charge(&prot->memory_allocated, amt);
-
- if (page_counter_read(&prot->memory_allocated) >
- prot->memory_allocated.limit)
- *parent_status = OVER_LIMIT;
-}
-
-static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
- unsigned long amt)
-{
- page_counter_uncharge(&prot->memory_allocated, amt);
+ return sk->sk_prot->sysctl_mem[index];
}
static inline long
sk_memory_allocated(const struct sock *sk)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- return page_counter_read(&sk->sk_cgrp->memory_allocated);
-
- return atomic_long_read(prot->memory_allocated);
+ return atomic_long_read(sk->sk_prot->memory_allocated);
}
static inline long
-sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
+sk_memory_allocated_add(struct sock *sk, int amt)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
- memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
- /* update the root cgroup regardless */
- atomic_long_add_return(amt, prot->memory_allocated);
- return page_counter_read(&sk->sk_cgrp->memory_allocated);
- }
-
- return atomic_long_add_return(amt, prot->memory_allocated);
+ return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
}
static inline void
sk_memory_allocated_sub(struct sock *sk, int amt)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- memcg_memory_allocated_sub(sk->sk_cgrp, amt);
-
- atomic_long_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, sk->sk_prot->memory_allocated);
}
static inline void sk_sockets_allocated_dec(struct sock *sk)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
- struct cg_proto *cg_proto = sk->sk_cgrp;
-
- for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
- percpu_counter_dec(&cg_proto->sockets_allocated);
- }
-
- percpu_counter_dec(prot->sockets_allocated);
+ percpu_counter_dec(sk->sk_prot->sockets_allocated);
}
static inline void sk_sockets_allocated_inc(struct sock *sk)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
- struct cg_proto *cg_proto = sk->sk_cgrp;
-
- for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
- percpu_counter_inc(&cg_proto->sockets_allocated);
- }
-
- percpu_counter_inc(prot->sockets_allocated);
+ percpu_counter_inc(sk->sk_prot->sockets_allocated);
}
static inline int
sk_sockets_allocated_read_positive(struct sock *sk)
{
- struct proto *prot = sk->sk_prot;
-
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated);
-
- return percpu_counter_read_positive(prot->sockets_allocated);
+ return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
}
static inline int
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a80255f4ca33..8ea19977ea53 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -289,8 +289,9 @@ extern int tcp_memory_pressure;
/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
{
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- return !!sk->sk_cgrp->memory_pressure;
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ return true;
return tcp_memory_pressure;
}
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
index 05b94d9453de..3a17b16ae8aa 100644
--- a/include/net/tcp_memcontrol.h
+++ b/include/net/tcp_memcontrol.h
@@ -1,7 +1,6 @@
#ifndef _TCP_MEMCG_H
#define _TCP_MEMCG_H
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
void tcp_destroy_cgroup(struct mem_cgroup *memcg);
#endif /* _TCP_MEMCG_H */
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
new file mode 100644
index 000000000000..97d635cabac8
--- /dev/null
+++ b/include/trace/events/huge_memory.h
@@ -0,0 +1,136 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM huge_memory
+
+#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HUGE_MEMORY_H
+
+#include <linux/tracepoint.h>
+
+#include <trace/events/gfpflags.h>
+
+#define SCAN_STATUS \
+ EM( SCAN_FAIL, "failed") \
+ EM( SCAN_SUCCEED, "succeeded") \
+ EM( SCAN_PMD_NULL, "pmd_null") \
+ EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
+ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
+ EM( SCAN_PAGE_RO, "no_writable_page") \
+ EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \
+ EM( SCAN_PAGE_NULL, "page_null") \
+ EM( SCAN_SCAN_ABORT, "scan_aborted") \
+ EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
+ EM( SCAN_PAGE_LRU, "page_not_in_lru") \
+ EM( SCAN_PAGE_LOCK, "page_locked") \
+ EM( SCAN_PAGE_ANON, "page_not_anon") \
+ EM( SCAN_ANY_PROCESS, "no_process_for_page") \
+ EM( SCAN_VMA_NULL, "vma_null") \
+ EM( SCAN_VMA_CHECK, "vma_check_failed") \
+ EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
+ EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \
+ EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
+ EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
+ EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed")
+
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+SCAN_STATUS
+
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
+
+TRACE_EVENT(mm_khugepaged_scan_pmd,
+
+ TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
+ bool referenced, int none_or_zero, int status),
+
+ TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __field(unsigned long, pfn)
+ __field(bool, writable)
+ __field(bool, referenced)
+ __field(int, none_or_zero)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->pfn = pfn;
+ __entry->writable = writable;
+ __entry->referenced = referenced;
+ __entry->none_or_zero = none_or_zero;
+ __entry->status = status;
+ ),
+
+ TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s",
+ __entry->mm,
+ __entry->pfn,
+ __entry->writable,
+ __entry->referenced,
+ __entry->none_or_zero,
+ __print_symbolic(__entry->status, SCAN_STATUS))
+);
+
+TRACE_EVENT(mm_collapse_huge_page,
+
+ TP_PROTO(struct mm_struct *mm, int isolated, int status),
+
+ TP_ARGS(mm, isolated, status),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __field(int, isolated)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->isolated = isolated;
+ __entry->status = status;
+ ),
+
+ TP_printk("mm=%p, isolated=%d, status=%s",
+ __entry->mm,
+ __entry->isolated,
+ __print_symbolic(__entry->status, SCAN_STATUS))
+);
+
+TRACE_EVENT(mm_collapse_huge_page_isolate,
+
+ TP_PROTO(unsigned long pfn, int none_or_zero,
+ bool referenced, bool writable, int status),
+
+ TP_ARGS(pfn, none_or_zero, referenced, writable, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, pfn)
+ __field(int, none_or_zero)
+ __field(bool, referenced)
+ __field(bool, writable)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->none_or_zero = none_or_zero;
+ __entry->referenced = referenced;
+ __entry->writable = writable;
+ __entry->status = status;
+ ),
+
+ TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s",
+ __entry->pfn,
+ __entry->none_or_zero,
+ __entry->referenced,
+ __entry->writable,
+ __print_symbolic(__entry->status, SCAN_STATUS))
+);
+
+#endif /* __HUGE_MEMORY_H */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h
new file mode 100644
index 000000000000..6fb644029c80
--- /dev/null
+++ b/include/trace/events/page_isolation.h
@@ -0,0 +1,38 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM page_isolation
+
+#if !defined(_TRACE_PAGE_ISOLATION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_ISOLATION_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(test_pages_isolated,
+
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long fin_pfn),
+
+ TP_ARGS(start_pfn, end_pfn, fin_pfn),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, start_pfn)
+ __field(unsigned long, end_pfn)
+ __field(unsigned long, fin_pfn)
+ ),
+
+ TP_fast_assign(
+ __entry->start_pfn = start_pfn;
+ __entry->end_pfn = end_pfn;
+ __entry->fin_pfn = fin_pfn;
+ ),
+
+ TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s",
+ __entry->start_pfn, __entry->end_pfn, __entry->fin_pfn,
+ __entry->end_pfn == __entry->fin_pfn ? "success" : "fail")
+);
+
+#endif /* _TRACE_PAGE_ISOLATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f66476b96264..31763dd8db1c 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -330,10 +330,9 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
TRACE_EVENT(mm_vmscan_writepage,
- TP_PROTO(struct page *page,
- int reclaim_flags),
+ TP_PROTO(struct page *page),
- TP_ARGS(page, reclaim_flags),
+ TP_ARGS(page),
TP_STRUCT__entry(
__field(unsigned long, pfn)
@@ -342,7 +341,7 @@ TRACE_EVENT(mm_vmscan_writepage,
TP_fast_assign(
__entry->pfn = page_to_pfn(page);
- __entry->reclaim_flags = reclaim_flags;
+ __entry->reclaim_flags = trace_reclaim_flags(page);
),
TP_printk("page=%p pfn=%lu flags=%s",
@@ -353,11 +352,11 @@ TRACE_EVENT(mm_vmscan_writepage,
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
- TP_PROTO(int nid, int zid,
- unsigned long nr_scanned, unsigned long nr_reclaimed,
- int priority, int reclaim_flags),
+ TP_PROTO(struct zone *zone,
+ unsigned long nr_scanned, unsigned long nr_reclaimed,
+ int priority, int file),
- TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags),
+ TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file),
TP_STRUCT__entry(
__field(int, nid)
@@ -369,12 +368,12 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
),
TP_fast_assign(
- __entry->nid = nid;
- __entry->zid = zid;
+ __entry->nid = zone_to_nid(zone);
+ __entry->zid = zone_idx(zone);
__entry->nr_scanned = nr_scanned;
__entry->nr_reclaimed = nr_reclaimed;
__entry->priority = priority;
- __entry->reclaim_flags = reclaim_flags;
+ __entry->reclaim_flags = trace_shrink_flags(file);
),
TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",