From df6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 8 Sep 2017 16:12:24 -0700 Subject: mm/device-public-memory: device memory cache coherent with CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Aneesh Kumar Cc: Paul E. McKenney Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: Ross Zwisler Cc: Balbir Singh Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 4 ++-- include/linux/ioport.h | 1 + include/linux/memremap.h | 21 +++++++++++++++++++++ include/linux/mm.h | 20 ++++++++++++-------- 4 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 67a03b20a2db..6d3b0b4fed4e 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma, #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) struct hmm_devmem; struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, @@ -494,7 +494,7 @@ struct hmm_device { */ struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); -#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ /* Below are for HMM internal use only! Not to be used by device driver! */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 3a4f69137bc2..f5cf32e80041 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -131,6 +131,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8aa6b82679e2..f8ee1c73ad2d 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. */ enum memory_type { MEMORY_DEVICE_HOST = 0, MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, }; /* @@ -92,6 +100,8 @@ enum memory_type { * The page_free() callback is called once the page refcount reaches 1 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. */ typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, unsigned long addr, @@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct page *page) return is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } + +static inline bool is_device_public_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; +} #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -157,6 +173,11 @@ static inline bool is_device_private_page(const struct page *page) { return false; } + +static inline bool is_device_public_page(const struct page *page) +{ + return false; +} #endif /** diff --git a/include/linux/mm.h b/include/linux/mm.h index eccdab4bb44a..de66a1127db4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -800,15 +800,16 @@ static inline bool is_zone_device_page(const struct page *page) } #endif -#ifdef CONFIG_DEVICE_PRIVATE -void put_zone_device_private_page(struct page *page); +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page); #else -static inline void put_zone_device_private_page(struct page *page) +static inline void put_zone_device_private_or_public_page(struct page *page) { } -#endif +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ static inline bool is_device_private_page(const struct page *page); +static inline bool is_device_public_page(const struct page *page); DECLARE_STATIC_KEY_FALSE(device_private_key); @@ -834,8 +835,9 @@ static inline void put_page(struct page *page) * include/linux/memremap.h and HMM for details. */ if (static_branch_unlikely(&device_private_key) && - unlikely(is_device_private_page(page))) { - put_zone_device_private_page(page); + unlikely(is_device_private_page(page) || + is_device_public_page(page))) { + put_zone_device_private_or_public_page(page); return; } @@ -1224,8 +1226,10 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ }; -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); +struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, bool with_public_device); +#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false) + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); -- cgit v1.2.3