diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-08 17:21:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-08 17:21:52 -0700 |
commit | 7d3bf613e99abbd96ac7b90ee3694a246c975021 (patch) | |
tree | 084e4d900025ce3459702d3a8c05ead860c67c64 /drivers/nvdimm/pmem.c | |
parent | a3818841bd5e9b4a7e0e732c19cf3a632fcb525e (diff) | |
parent | 930218affeadd1325ea17e053f0dcecf218f5a4f (diff) | |
download | linux-stable-7d3bf613e99abbd96ac7b90ee3694a246c975021.tar.gz linux-stable-7d3bf613e99abbd96ac7b90ee3694a246c975021.tar.bz2 linux-stable-7d3bf613e99abbd96ac7b90ee3694a246c975021.zip |
Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"This adds a user for the new 'bytes-remaining' updates to
memcpy_mcsafe() that you already received through Ingo via the
x86-dax- for-linus pull.
Not included here, but still targeting this cycle, is support for
handling memory media errors (poison) consumed via userspace dax
mappings.
Summary:
- DAX broke a fundamental assumption of truncate of file mapped
pages. The truncate path assumed that it is safe to disconnect a
pinned page from a file and let the filesystem reclaim the physical
block. With DAX the page is equivalent to the filesystem block.
Introduce dax_layout_busy_page() to enable filesystems to wait for
pinned DAX pages to be released. Without this wait a filesystem
could allocate blocks under active device-DMA to a new file.
- DAX arranges for the block layer to be bypassed and uses
dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
However, the memcpy_mcsafe() facility is available through the pmem
block driver. In order to safely handle media errors, via the DAX
block-layer bypass, introduce copy_to_iter_mcsafe().
- Fix cache management policy relative to the ACPI NFIT Platform
Capabilities Structure to properly elide cache flushes when they
are not necessary. The table indicates whether CPU caches are
power-fail protected. Clarify that a deep flush is always performed
on REQ_{FUA,PREFLUSH} requests"
* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
dax: Use dax_write_cache* helpers
libnvdimm, pmem: Do not flush power-fail protected CPU caches
libnvdimm, pmem: Unconditionally deep flush on *sync
libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
acpi, nfit: Remove ecc_unit_size
dax: dax_insert_mapping_entry always succeeds
libnvdimm, e820: Register all pmem resources
libnvdimm: Debug probe times
linvdimm, pmem: Preserve read-only setting for pmem devices
x86, nfit_test: Add unit test for memcpy_mcsafe()
pmem: Switch to copy_to_iter_mcsafe()
dax: Report bytes remaining in dax_iomap_actor()
dax: Introduce a ->copy_to_iter dax operation
uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
xfs, dax: introduce xfs_break_dax_layouts()
xfs: prepare xfs_break_layouts() for another layout type
xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
mm, fs, dax: handle layout changes to pinned dax mappings
mm: fix __gup_device_huge vs unmap
mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
...
Diffstat (limited to 'drivers/nvdimm/pmem.c')
-rw-r--r-- | drivers/nvdimm/pmem.c | 46 |
1 files changed, 36 insertions, 10 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e023d6aa22b5..68940356cad3 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -164,11 +164,6 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, return rc; } -/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */ -#ifndef REQ_FLUSH -#define REQ_FLUSH REQ_PREFLUSH -#endif - static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { blk_status_t rc = 0; @@ -179,7 +174,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct pmem_device *pmem = q->queuedata; struct nd_region *nd_region = to_region(pmem); - if (bio->bi_opf & REQ_FLUSH) + if (bio->bi_opf & REQ_PREFLUSH) nvdimm_flush(nd_region); do_acct = nd_iostat_start(bio, &start); @@ -264,9 +259,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } +static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter_mcsafe(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, + .copy_to_iter = pmem_copy_to_iter, }; static const struct attribute_group *pmem_attribute_groups[] = { @@ -294,12 +296,33 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } +static void pmem_release_pgmap_ops(void *__pgmap) +{ + dev_pagemap_put_ops(); +} + +static void fsdax_pagefree(struct page *page, void *data) +{ + wake_up_var(&page->_refcount); +} + +static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) +{ + dev_pagemap_get_ops(); + if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) + return -ENOMEM; + pgmap->type = MEMORY_DEVICE_FS_DAX; + pgmap->page_free = fsdax_pagefree; + + return 0; +} + static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); - int nid = dev_to_node(dev), fua, wbc; + int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; struct resource bb_res; struct nd_pfn *nd_pfn = NULL; @@ -335,7 +358,6 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - wbc = nvdimm_has_cache(nd_region); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -353,6 +375,8 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { + if (setup_pagemap_fsdax(dev, &pmem->pgmap)) + return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); @@ -364,6 +388,8 @@ static int pmem_attach_disk(struct device *dev, } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.altmap_valid = false; + if (setup_pagemap_fsdax(dev, &pmem->pgmap)) + return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); @@ -382,7 +408,7 @@ static int pmem_attach_disk(struct device *dev, return PTR_ERR(addr); pmem->virt_addr = addr; - blk_queue_write_cache(q, wbc, fua); + blk_queue_write_cache(q, true, fua); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_logical_block_size(q, pmem_sector_size(ndns)); @@ -413,7 +439,7 @@ static int pmem_attach_disk(struct device *dev, put_disk(disk); return -ENOMEM; } - dax_write_cache(dax_dev, wbc); + dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; gendev = disk_to_dev(disk); |