diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 11:41:44 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 11:41:44 -0800 |
commit | 875fc4f5ddf35605581f9a5900c14afef48611f2 (patch) | |
tree | e237a28a71a5d1e72eaf0ecda737eb5c8614c72c /include | |
parent | 7d1fc01afc5af35e5197e0e75abe900f6bd279b8 (diff) | |
parent | 7dfa4612204b511c934ca2a0e4f306f9981bd9aa (diff) | |
download | linux-875fc4f5ddf35605581f9a5900c14afef48611f2.tar.gz linux-875fc4f5ddf35605581f9a5900c14afef48611f2.tar.bz2 linux-875fc4f5ddf35605581f9a5900c14afef48611f2.zip |
Merge branch 'akpm' (patches from Andrew)
Merge first patch-bomb from Andrew Morton:
- A few hotfixes which missed 4.4 becasue I was asleep. cc'ed to
-stable
- A few misc fixes
- OCFS2 updates
- Part of MM. Including pretty large changes to page-flags handling
and to thp management which have been buffered up for 2-3 cycles now.
I have a lot of MM material this time.
[ It turns out the THP part wasn't quite ready, so that got dropped from
this series - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits)
zsmalloc: reorganize struct size_class to pack 4 bytes hole
mm/zbud.c: use list_last_entry() instead of list_tail_entry()
zram/zcomp: do not zero out zcomp private pages
zram: pass gfp from zcomp frontend to backend
zram: try vmalloc() after kmalloc()
zram/zcomp: use GFP_NOIO to allocate streams
mm: add tracepoint for scanning pages
drivers/base/memory.c: fix kernel warning during memory hotplug on ppc64
mm/page_isolation: use macro to judge the alignment
mm: fix noisy sparse warning in LIBCFS_ALLOC_PRE()
mm: rework virtual memory accounting
include/linux/memblock.h: fix ordering of 'flags' argument in comments
mm: move lru_to_page to mm_inline.h
Documentation/filesystems: describe the shared memory usage/accounting
memory-hotplug: don't BUG() in register_memory_resource()
hugetlb: make mm and fs code explicitly non-modular
mm/swapfile.c: use list_for_each_entry_safe in free_swap_count_continuations
mm: /proc/pid/clear_refs: no need to clear VM_SOFTDIRTY in clear_soft_dirty_pmd()
mm: make sure isolate_lru_page() is never called for tail page
vmstat: make vmstat_updater deferrable again and shut down on idle
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/memory_model.h | 4 | ||||
-rw-r--r-- | include/linux/dcache.h | 4 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 5 | ||||
-rw-r--r-- | include/linux/gfp.h | 22 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 10 | ||||
-rw-r--r-- | include/linux/memblock.h | 13 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 102 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 46 | ||||
-rw-r--r-- | include/linux/mm_inline.h | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 9 | ||||
-rw-r--r-- | include/linux/mmzone.h | 37 | ||||
-rw-r--r-- | include/linux/pfn.h | 1 | ||||
-rw-r--r-- | include/linux/shmem_fs.h | 4 | ||||
-rw-r--r-- | include/linux/slab.h | 5 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/linux/thread_info.h | 5 | ||||
-rw-r--r-- | include/linux/vmalloc.h | 1 | ||||
-rw-r--r-- | include/linux/vmpressure.h | 7 | ||||
-rw-r--r-- | include/linux/vmstat.h | 2 | ||||
-rw-r--r-- | include/net/sock.h | 138 | ||||
-rw-r--r-- | include/net/tcp.h | 5 | ||||
-rw-r--r-- | include/net/tcp_memcontrol.h | 1 | ||||
-rw-r--r-- | include/trace/events/huge_memory.h | 136 | ||||
-rw-r--r-- | include/trace/events/page_isolation.h | 38 | ||||
-rw-r--r-- | include/trace/events/vmscan.h | 21 |
26 files changed, 371 insertions, 250 deletions
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 4b4b056a6eb0..5148150cc80b 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -1,6 +1,8 @@ #ifndef __ASM_MEMORY_MODEL_H #define __ASM_MEMORY_MODEL_H +#include <linux/pfn.h> + #ifndef __ASSEMBLY__ #if defined(CONFIG_FLATMEM) @@ -72,7 +74,7 @@ /* * Convert a physical address to a Page Frame Number and back */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) +#define __phys_to_pfn(paddr) PHYS_PFN(paddr) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define page_to_pfn __page_to_pfn diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d67ae119cf4e..7781ce110503 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -27,10 +27,10 @@ struct vfsmount; /* The hash is always the low bits of hash_len */ #ifdef __LITTLE_ENDIAN - #define HASH_LEN_DECLARE u32 hash; u32 len; + #define HASH_LEN_DECLARE u32 hash; u32 len #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) #else - #define HASH_LEN_DECLARE u32 len; u32 hash; + #define HASH_LEN_DECLARE u32 len; u32 hash #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) #endif diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 533c4408529a..6b7e89f45aa4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -220,7 +220,10 @@ struct fsnotify_mark { /* List of marks by group->i_fsnotify_marks. Also reused for queueing * mark into destroy_list when it's waiting for the end of SRCU period * before it can be freed. [group->mark_mutex] */ - struct list_head g_list; + union { + struct list_head g_list; + struct rcu_head g_rcu; + }; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; /* List of marks for inode / vfsmount [obj_lock] */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8942af0813e3..28ad5f6494b0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,7 +30,7 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u -#define ___GFP_NOACCOUNT 0x100000u +#define ___GFP_ACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -73,11 +73,15 @@ struct vm_area_struct; * * __GFP_THISNODE forces the allocation to be satisified from the requested * node with no fallbacks or placement policy enforcements. + * + * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant + * to kmem allocations). */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) /* * Watermark modifiers -- controls access to emergency reserves @@ -104,7 +108,6 @@ struct vm_area_struct; #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) -#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* * Reclaim modifiers @@ -197,6 +200,9 @@ struct vm_area_struct; * GFP_KERNEL is typical for kernel-internal allocations. The caller requires * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. * + * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * * GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. * @@ -236,6 +242,7 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) @@ -271,7 +278,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); + return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM @@ -377,10 +384,11 @@ static inline enum zone_type gfp_zone(gfp_t flags) static inline int gfp_zonelist(gfp_t flags) { - if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE)) - return 1; - - return 0; +#ifdef CONFIG_NUMA + if (unlikely(flags & __GFP_THISNODE)) + return ZONELIST_NOFALLBACK; +#endif + return ZONELIST_FALLBACK; } /* diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b0eb06423d5e..e76574d8f9b5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -263,20 +263,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, struct user_struct **user, int creat_flags, int page_size_log); -static inline int is_file_hugepages(struct file *file) +static inline bool is_file_hugepages(struct file *file) { if (file->f_op == &hugetlbfs_file_operations) - return 1; - if (is_file_shm_hugepages(file)) - return 1; + return true; - return 0; + return is_file_shm_hugepages(file); } #else /* !CONFIG_HUGETLBFS */ -#define is_file_hugepages(file) 0 +#define is_file_hugepages(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fec66f86eeff..173fb44e22f1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -216,10 +216,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, * for_each_free_mem_range - iterate through free memblock areas * @i: u64 used as loop variable * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL - * @flags: pick from blocks based on memory attributes * * Walks over free (memory && !reserved) areas of memblock. Available as * soon as memblock is initialized. @@ -232,10 +232,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, * for_each_free_mem_range_reverse - rev-iterate through free memblock areas * @i: u64 used as loop variable * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL - * @flags: pick from blocks based on memory attributes * * Walks over free (memory && !reserved) areas of memblock in reverse * order. Available as soon as memblock is initialized. @@ -325,10 +325,10 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); -int memblock_is_memory(phys_addr_t addr); +bool memblock_is_memory(phys_addr_t addr); int memblock_is_map_memory(phys_addr_t addr); int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); -int memblock_is_reserved(phys_addr_t addr); +bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void __memblock_dump_all(void); @@ -399,6 +399,11 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ region++) +#define for_each_memblock_type(memblock_type, rgn) \ + idx = 0; \ + rgn = &memblock_type->regions[idx]; \ + for (idx = 0; idx < memblock_type->cnt; \ + idx++,rgn = &memblock_type->regions[idx]) #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index cd0e2413c358..2292468f2a30 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,32 +85,10 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { - /* Currently active and new sockets should be assigned to cgroups */ - MEMCG_SOCK_ACTIVE, - /* It was ever activated; we must disarm static keys on destruction */ - MEMCG_SOCK_ACTIVATED, -}; - struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ - struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; - long sysctl_mem[3]; - unsigned long flags; - /* - * memcg field is used to find which memcg we belong directly - * Each memcg struct can hold more than one cg_proto, so container_of - * won't really cut. - * - * The elegant solution would be having an inverse function to - * proto_cgroup in struct proto, but that means polluting the structure - * for everybody, instead of just for memcg users. - */ - struct mem_cgroup *memcg; + bool active; }; #ifdef CONFIG_MEMCG @@ -192,6 +170,9 @@ struct mem_cgroup { unsigned long low; unsigned long high; + /* Range enforcement for interrupt charges */ + struct work_struct high_work; + unsigned long soft_limit; /* vmpressure notifications */ @@ -268,6 +249,10 @@ struct mem_cgroup { struct wb_domain cgwb_domain; #endif +#ifdef CONFIG_INET + unsigned long socket_pressure; +#endif + /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; @@ -275,7 +260,8 @@ struct mem_cgroup { struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; -extern struct cgroup_subsys_state *mem_cgroup_root_css; + +extern struct mem_cgroup *root_mem_cgroup; /** * mem_cgroup_events - count memory events against a cgroup @@ -308,18 +294,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; } +#define mem_cgroup_from_counter(counter, member) \ + container_of(counter, struct mem_cgroup, member) + struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, struct mem_cgroup_reclaim_cookie *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +/** + * parent_mem_cgroup - find the accounting parent of a memcg + * @memcg: memcg whose parent to find + * + * Returns the parent memcg, or NULL if this is the root or the memory + * controller is in legacy no-hierarchy mode. + */ +static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) +{ + if (!memcg->memory.parent) + return NULL; + return mem_cgroup_from_counter(memcg->memory.parent, memory); +} + static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root) { @@ -671,12 +673,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ -enum { - UNDER_LIMIT, - SOFT_LIMIT, - OVER_LIMIT, -}; - #ifdef CONFIG_CGROUP_WRITEBACK struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); @@ -703,20 +699,35 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); -#else -static inline void sock_update_memcg(struct sock *sk) +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +#if defined(CONFIG_MEMCG) && defined(CONFIG_INET) +extern struct static_key_false memcg_sockets_enabled_key; +#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { +#ifdef CONFIG_MEMCG_KMEM + if (memcg->tcp_mem.memory_pressure) + return true; +#endif + do { + if (time_before(jiffies, memcg->socket_pressure)) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + return false; } -static inline void sock_release_memcg(struct sock *sk) +#else +#define mem_cgroup_sockets_enabled 0 +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { + return false; } -#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ +#endif #ifdef CONFIG_MEMCG_KMEM -extern struct static_key memcg_kmem_enabled_key; +extern struct static_key_false memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; void memcg_get_cache_ids(void); @@ -732,7 +743,7 @@ void memcg_put_cache_ids(void); static inline bool memcg_kmem_enabled(void) { - return static_key_false(&memcg_kmem_enabled_key); + return static_branch_unlikely(&memcg_kmem_enabled_key); } static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) @@ -766,15 +777,13 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); +struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); void __memcg_kmem_put_cache(struct kmem_cache *cachep); -static inline bool __memcg_kmem_bypass(gfp_t gfp) +static inline bool __memcg_kmem_bypass(void) { if (!memcg_kmem_enabled()) return true; - if (gfp & __GFP_NOACCOUNT) - return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; @@ -791,7 +800,9 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) static __always_inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) { - if (__memcg_kmem_bypass(gfp)) + if (__memcg_kmem_bypass()) + return 0; + if (!(gfp & __GFP_ACCOUNT)) return 0; return __memcg_kmem_charge(page, gfp, order); } @@ -810,16 +821,15 @@ static __always_inline void memcg_kmem_uncharge(struct page *page, int order) /** * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation * @cachep: the original global kmem cache - * @gfp: allocation flags. * * All memory allocated from a per-memcg cache is charged to the owner memcg. */ static __always_inline struct kmem_cache * memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { - if (__memcg_kmem_bypass(gfp)) + if (__memcg_kmem_bypass()) return cachep; - return __memcg_kmem_get_cache(cachep); + return __memcg_kmem_get_cache(cachep, gfp); } static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 3d385c81c153..2696c1f05ed1 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -122,7 +122,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - spinlock_t lock; + rwlock_t lock; }; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); diff --git a/include/linux/mm.h b/include/linux/mm.h index 00bad7793788..839d9e9a1c38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -51,6 +51,17 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS +extern const int mmap_rnd_bits_min; +extern const int mmap_rnd_bits_max; +extern int mmap_rnd_bits __read_mostly; +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS +extern const int mmap_rnd_compat_bits_min; +extern const int mmap_rnd_compat_bits_max; +extern int mmap_rnd_compat_bits __read_mostly; +#endif + #include <asm/page.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -225,10 +236,14 @@ extern pgprot_t protection_map[16]; * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * + * MM layer fills up gfp_mask for page allocations but fault handler might + * alter it if its implementation requires a different allocation context. + * * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ + gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ void __user *virtual_address; /* Faulting virtual address */ @@ -1361,10 +1376,26 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member) atomic_long_dec(&mm->rss_stat.count[member]); } +/* Optimized variant when page is already known not to be PageAnon */ +static inline int mm_counter_file(struct page *page) +{ + if (PageSwapBacked(page)) + return MM_SHMEMPAGES; + return MM_FILEPAGES; +} + +static inline int mm_counter(struct page *page) +{ + if (PageAnon(page)) + return MM_ANONPAGES; + return mm_counter_file(page); +} + static inline unsigned long get_mm_rss(struct mm_struct *mm) { return get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_ANONPAGES); + get_mm_counter(mm, MM_ANONPAGES) + + get_mm_counter(mm, MM_SHMEMPAGES); } static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) @@ -1898,7 +1929,9 @@ extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); -extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); +extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); +extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); + extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long flags, @@ -2116,15 +2149,6 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); -#ifdef CONFIG_PROC_FS -void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); -#else -static inline void vm_stat_account(struct mm_struct *mm, - unsigned long flags, struct file *file, long pages) -{ - mm->total_vm += pages; -} -#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_DEBUG_PAGEALLOC extern bool _debug_pagealloc_enabled; diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index cf55945c83fb..712e8c37a200 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -100,4 +100,6 @@ static __always_inline enum lru_list page_lru(struct page *page) return lru; } +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f8d1492a114f..6bc9a0ce2253 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -369,9 +369,10 @@ struct core_state { }; enum { - MM_FILEPAGES, - MM_ANONPAGES, - MM_SWAPENTS, + MM_FILEPAGES, /* Resident file mapping pages */ + MM_ANONPAGES, /* Resident anonymous pages */ + MM_SWAPENTS, /* Anonymous swap entries */ + MM_SHMEMPAGES, /* Resident shared memory pages */ NR_MM_COUNTERS }; @@ -426,7 +427,7 @@ struct mm_struct { unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ unsigned long pinned_vm; /* Refcount permanently increased */ - unsigned long shared_vm; /* Shared pages (files) */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ unsigned long def_flags; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..33bb1b19273e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -195,11 +195,6 @@ static inline int is_active_lru(enum lru_list lru) return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } -static inline int is_unevictable_lru(enum lru_list lru) -{ - return (lru == LRU_UNEVICTABLE); -} - struct zone_reclaim_stat { /* * The pageout code in vmscan.c keeps track of how many of the @@ -361,10 +356,10 @@ struct zone { struct per_cpu_pageset __percpu *pageset; /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. + * This is a per-zone reserve of pages that are not available + * to userspace allocations. */ - unsigned long dirty_balance_reserve; + unsigned long totalreserve_pages; #ifndef CONFIG_SPARSEMEM /* @@ -576,19 +571,17 @@ static inline bool zone_is_empty(struct zone *zone) /* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) +enum { + ZONELIST_FALLBACK, /* zonelist with fallback */ #ifdef CONFIG_NUMA - -/* - * The NUMA zonelists are doubled because we need zonelists that restrict the - * allocations to a single node for __GFP_THISNODE. - * - * [0] : Zonelist with fallback - * [1] : No fallback (__GFP_THISNODE) - */ -#define MAX_ZONELISTS 2 -#else -#define MAX_ZONELISTS 1 + /* + * The NUMA zonelists are doubled because we need zonelists that + * restrict the allocations to a single node for __GFP_THISNODE. + */ + ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ #endif + MAX_ZONELISTS +}; /* * This struct contains information about a zone in a zonelist. It is stored @@ -1207,13 +1200,13 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); * the zone and PFN linkages are still valid. This is expensive, but walkers * of the full memmap are extremely rare. */ -int memmap_valid_within(unsigned long pfn, +bool memmap_valid_within(unsigned long pfn, struct page *page, struct zone *zone); #else -static inline int memmap_valid_within(unsigned long pfn, +static inline bool memmap_valid_within(unsigned long pfn, struct page *page, struct zone *zone) { - return 1; + return true; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 7646637221f3..97f3e88aead4 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -9,5 +9,6 @@ #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) #endif diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 50777b5b1e4c..a43f41cb3c43 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -60,6 +60,10 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); +extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); +extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, + pgoff_t start, pgoff_t end); + static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) { diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..3ffee7422012 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -86,6 +86,11 @@ #else # define SLAB_FAILSLAB 0x00000000UL #endif +#ifdef CONFIG_MEMCG_KMEM +# define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */ +#else +# define SLAB_ACCOUNT 0x00000000UL +#endif /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 7ba7dccaf0e7..066bd21765ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -287,7 +287,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node) /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern unsigned long dirty_balance_reserve; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ff307b548ed3..b4c2a485b28a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -56,9 +56,10 @@ extern long do_no_restart_syscall(struct restart_block *parm); #ifdef __KERNEL__ #ifdef CONFIG_DEBUG_STACK_USAGE -# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ + __GFP_ZERO) #else -# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK) #endif /* diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3bff87a25a42..d1f1d338af20 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -14,7 +14,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ -#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3e4535876d37..3347cc3ec0ab 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -12,6 +12,9 @@ struct vmpressure { unsigned long scanned; unsigned long reclaimed; + + unsigned long tree_scanned; + unsigned long tree_reclaimed; /* The lock is used to keep the scanned/reclaimed above in sync. */ struct spinlock sr_lock; @@ -26,7 +29,7 @@ struct vmpressure { struct mem_cgroup; #ifdef CONFIG_MEMCG -extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, +extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed); extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); @@ -40,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg, extern void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd); #else -static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, +static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed) {} static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) {} diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3e5d9075960f..73fae8c4a5fb 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); +void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); @@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page, static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } +static inline void quiet_vmstat(void) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) { } diff --git a/include/net/sock.h b/include/net/sock.h index e830c1006935..b9e7b3d863a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -71,22 +71,6 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> -struct cgroup; -struct cgroup_subsys; -#ifdef CONFIG_NET -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss); -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg); -#else -static inline -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) -{ - return 0; -} -static inline -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) -{ -} -#endif /* * This structure really needs to be cleaned up. * Most of it is for TCP, and not used by any of @@ -245,7 +229,6 @@ struct sock_common { /* public: */ }; -struct cg_proto; /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -310,7 +293,7 @@ struct cg_proto; * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup - * @sk_cgrp: this socket's cgroup-specific proto data + * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed @@ -446,7 +429,7 @@ struct sock { void *sk_security; #endif struct sock_cgroup_data sk_cgrp_data; - struct cg_proto *sk_cgrp; + struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); @@ -1096,23 +1079,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) -extern struct static_key memcg_socket_limit_enabled; -static inline struct cg_proto *parent_cg_proto(struct proto *proto, - struct cg_proto *cg_proto) -{ - return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); -} -#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) -#else -#define mem_cgroup_sockets_enabled 0 -static inline struct cg_proto *parent_cg_proto(struct proto *proto, - struct cg_proto *cg_proto) -{ - return NULL; -} -#endif - static inline bool sk_stream_memory_free(const struct sock *sk) { if (sk->sk_wmem_queued >= sk->sk_sndbuf) @@ -1139,8 +1105,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return !!sk->sk_cgrp->memory_pressure; + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) + return true; return !!*sk->sk_prot->memory_pressure; } @@ -1154,15 +1121,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk) if (*memory_pressure) *memory_pressure = 0; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - struct proto *prot = sk->sk_prot; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->memory_pressure = 0; - } - } static inline void sk_enter_memory_pressure(struct sock *sk) @@ -1170,116 +1128,46 @@ static inline void sk_enter_memory_pressure(struct sock *sk) if (!sk->sk_prot->enter_memory_pressure) return; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - struct proto *prot = sk->sk_prot; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->memory_pressure = 1; - } - sk->sk_prot->enter_memory_pressure(sk); } static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long *prot = sk->sk_prot->sysctl_mem; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - prot = sk->sk_cgrp->sysctl_mem; - return prot[index]; -} - -static inline void memcg_memory_allocated_add(struct cg_proto *prot, - unsigned long amt, - int *parent_status) -{ - page_counter_charge(&prot->memory_allocated, amt); - - if (page_counter_read(&prot->memory_allocated) > - prot->memory_allocated.limit) - *parent_status = OVER_LIMIT; -} - -static inline void memcg_memory_allocated_sub(struct cg_proto *prot, - unsigned long amt) -{ - page_counter_uncharge(&prot->memory_allocated, amt); + return sk->sk_prot->sysctl_mem[index]; } static inline long sk_memory_allocated(const struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return page_counter_read(&sk->sk_cgrp->memory_allocated); - - return atomic_long_read(prot->memory_allocated); + return atomic_long_read(sk->sk_prot->memory_allocated); } static inline long -sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) +sk_memory_allocated_add(struct sock *sk, int amt) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); - /* update the root cgroup regardless */ - atomic_long_add_return(amt, prot->memory_allocated); - return page_counter_read(&sk->sk_cgrp->memory_allocated); - } - - return atomic_long_add_return(amt, prot->memory_allocated); + return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); } static inline void sk_memory_allocated_sub(struct sock *sk, int amt) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - memcg_memory_allocated_sub(sk->sk_cgrp, amt); - - atomic_long_sub(amt, prot->memory_allocated); + atomic_long_sub(amt, sk->sk_prot->memory_allocated); } static inline void sk_sockets_allocated_dec(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_dec(&cg_proto->sockets_allocated); - } - - percpu_counter_dec(prot->sockets_allocated); + percpu_counter_dec(sk->sk_prot->sockets_allocated); } static inline void sk_sockets_allocated_inc(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_inc(&cg_proto->sockets_allocated); - } - - percpu_counter_inc(prot->sockets_allocated); + percpu_counter_inc(sk->sk_prot->sockets_allocated); } static inline int sk_sockets_allocated_read_positive(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated); - - return percpu_counter_read_positive(prot->sockets_allocated); + return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); } static inline int diff --git a/include/net/tcp.h b/include/net/tcp.h index a80255f4ca33..8ea19977ea53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -289,8 +289,9 @@ extern int tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return !!sk->sk_cgrp->memory_pressure; + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) + return true; return tcp_memory_pressure; } diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 05b94d9453de..3a17b16ae8aa 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@ -1,7 +1,6 @@ #ifndef _TCP_MEMCG_H #define _TCP_MEMCG_H -struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); void tcp_destroy_cgroup(struct mem_cgroup *memcg); #endif /* _TCP_MEMCG_H */ diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h new file mode 100644 index 000000000000..97d635cabac8 --- /dev/null +++ b/include/trace/events/huge_memory.h @@ -0,0 +1,136 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM huge_memory + +#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HUGE_MEMORY_H + +#include <linux/tracepoint.h> + +#include <trace/events/gfpflags.h> + +#define SCAN_STATUS \ + EM( SCAN_FAIL, "failed") \ + EM( SCAN_SUCCEED, "succeeded") \ + EM( SCAN_PMD_NULL, "pmd_null") \ + EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ + EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ + EM( SCAN_PAGE_RO, "no_writable_page") \ + EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \ + EM( SCAN_PAGE_NULL, "page_null") \ + EM( SCAN_SCAN_ABORT, "scan_aborted") \ + EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \ + EM( SCAN_PAGE_LRU, "page_not_in_lru") \ + EM( SCAN_PAGE_LOCK, "page_locked") \ + EM( SCAN_PAGE_ANON, "page_not_anon") \ + EM( SCAN_ANY_PROCESS, "no_process_for_page") \ + EM( SCAN_VMA_NULL, "vma_null") \ + EM( SCAN_VMA_CHECK, "vma_check_failed") \ + EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \ + EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \ + EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ + EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ + EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") + +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +SCAN_STATUS + +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + +TRACE_EVENT(mm_khugepaged_scan_pmd, + + TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable, + bool referenced, int none_or_zero, int status), + + TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(unsigned long, pfn) + __field(bool, writable) + __field(bool, referenced) + __field(int, none_or_zero) + __field(int, status) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->pfn = pfn; + __entry->writable = writable; + __entry->referenced = referenced; + __entry->none_or_zero = none_or_zero; + __entry->status = status; + ), + + TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s", + __entry->mm, + __entry->pfn, + __entry->writable, + __entry->referenced, + __entry->none_or_zero, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +TRACE_EVENT(mm_collapse_huge_page, + + TP_PROTO(struct mm_struct *mm, int isolated, int status), + + TP_ARGS(mm, isolated, status), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(int, isolated) + __field(int, status) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->isolated = isolated; + __entry->status = status; + ), + + TP_printk("mm=%p, isolated=%d, status=%s", + __entry->mm, + __entry->isolated, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +TRACE_EVENT(mm_collapse_huge_page_isolate, + + TP_PROTO(unsigned long pfn, int none_or_zero, + bool referenced, bool writable, int status), + + TP_ARGS(pfn, none_or_zero, referenced, writable, status), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(int, none_or_zero) + __field(bool, referenced) + __field(bool, writable) + __field(int, status) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->none_or_zero = none_or_zero; + __entry->referenced = referenced; + __entry->writable = writable; + __entry->status = status; + ), + + TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s", + __entry->pfn, + __entry->none_or_zero, + __entry->referenced, + __entry->writable, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +#endif /* __HUGE_MEMORY_H */ +#include <trace/define_trace.h> diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h new file mode 100644 index 000000000000..6fb644029c80 --- /dev/null +++ b/include/trace/events/page_isolation.h @@ -0,0 +1,38 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM page_isolation + +#if !defined(_TRACE_PAGE_ISOLATION_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_ISOLATION_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(test_pages_isolated, + + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long fin_pfn), + + TP_ARGS(start_pfn, end_pfn, fin_pfn), + + TP_STRUCT__entry( + __field(unsigned long, start_pfn) + __field(unsigned long, end_pfn) + __field(unsigned long, fin_pfn) + ), + + TP_fast_assign( + __entry->start_pfn = start_pfn; + __entry->end_pfn = end_pfn; + __entry->fin_pfn = fin_pfn; + ), + + TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s", + __entry->start_pfn, __entry->end_pfn, __entry->fin_pfn, + __entry->end_pfn == __entry->fin_pfn ? "success" : "fail") +); + +#endif /* _TRACE_PAGE_ISOLATION_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f66476b96264..31763dd8db1c 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -330,10 +330,9 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, TRACE_EVENT(mm_vmscan_writepage, - TP_PROTO(struct page *page, - int reclaim_flags), + TP_PROTO(struct page *page), - TP_ARGS(page, reclaim_flags), + TP_ARGS(page), TP_STRUCT__entry( __field(unsigned long, pfn) @@ -342,7 +341,7 @@ TRACE_EVENT(mm_vmscan_writepage, TP_fast_assign( __entry->pfn = page_to_pfn(page); - __entry->reclaim_flags = reclaim_flags; + __entry->reclaim_flags = trace_reclaim_flags(page); ), TP_printk("page=%p pfn=%lu flags=%s", @@ -353,11 +352,11 @@ TRACE_EVENT(mm_vmscan_writepage, TRACE_EVENT(mm_vmscan_lru_shrink_inactive, - TP_PROTO(int nid, int zid, - unsigned long nr_scanned, unsigned long nr_reclaimed, - int priority, int reclaim_flags), + TP_PROTO(struct zone *zone, + unsigned long nr_scanned, unsigned long nr_reclaimed, + int priority, int file), - TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags), + TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file), TP_STRUCT__entry( __field(int, nid) @@ -369,12 +368,12 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, ), TP_fast_assign( - __entry->nid = nid; - __entry->zid = zid; + __entry->nid = zone_to_nid(zone); + __entry->zid = zone_idx(zone); __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; __entry->priority = priority; - __entry->reclaim_flags = reclaim_flags; + __entry->reclaim_flags = trace_shrink_flags(file); ), TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s", |