summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 12:12:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 12:12:56 -0700
commitdf668a5fe461bb9d7e899c538acc7197746038f4 (patch)
tree315a71104f5cea7feeb56c9f2c768453408b72f7 /fs
parentdf04fbe8680bfe07f3d7487eccff9f768bb02533 (diff)
parent2705dfb2094777e405e065105e307074af8965c1 (diff)
downloadlinux-df668a5fe461bb9d7e899c538acc7197746038f4.tar.gz
linux-df668a5fe461bb9d7e899c538acc7197746038f4.tar.bz2
linux-df668a5fe461bb9d7e899c538acc7197746038f4.zip
Merge tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: - disk events cleanup (Christoph) - gendisk and request queue allocation simplifications (Christoph) - bdev_disk_changed cleanups (Christoph) - IO priority improvements (Bart) - Chained bio completion trace fix (Edward) - blk-wbt fixes (Jan) - blk-wbt enable/disable fix (Zhang) - Scheduler dispatch improvements (Jan, Ming) - Shared tagset scheduler improvements (John) - BFQ updates (Paolo, Luca, Pietro) - BFQ lock inversion fix (Jan) - Documentation improvements (Kir) - CLONE_IO block cgroup fix (Tejun) - Remove of ancient and deprecated block dump feature (zhangyi) - Discard merge fix (Ming) - Misc fixes or followup fixes (Colin, Damien, Dan, Long, Max, Thomas, Yang) * tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block: (129 commits) block: fix discard request merge block/mq-deadline: Remove a WARN_ON_ONCE() call blk-mq: update hctx->dispatch_busy in case of real scheduler blk: Fix lock inversion between ioc lock and bfqd lock bfq: Remove merged request already in bfq_requests_merged() block: pass a gendisk to bdev_disk_changed block: move bdev_disk_changed block: add the events* attributes to disk_attrs block: move the disk events code to a separate file block: fix trace completion for chained bio block/partitions/msdos: Fix typo inidicator -> indicator block, bfq: reset waker pointer with shared queues block, bfq: check waker only for queues with no in-flight I/O block, bfq: avoid delayed merge of async queues block, bfq: boost throughput by extending queue-merging times block, bfq: consider also creation time in delayed stable merge block, bfq: fix delayed stable merge check block, bfq: let also stably merged queues enjoy weight raising blk-wbt: make sure throttle is enabled properly blk-wbt: introduce a new disable state to prevent false positive by rwb_enabled() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c244
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/fs-writeback.c25
-rw-r--r--fs/super.c8
4 files changed, 97 insertions, 182 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index eb34f5c357cf..ca8bf1869ca8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
bdev = I_BDEV(inode);
- mutex_init(&bdev->bd_mutex);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
bdev->bd_disk = disk;
@@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
struct bd_holder_disk *holder;
int ret = 0;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
WARN_ON_ONCE(!bdev->bd_holder);
@@ -1199,7 +1198,7 @@ out_del:
out_free:
kfree(holder);
out_unlock:
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
struct bd_holder_disk *holder;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
holder = bd_find_holder_disk(bdev, disk);
@@ -1230,138 +1229,97 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
kfree(holder);
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+ WARN_ON_ONCE(bdev->bd_holders);
+ sync_blockdev(bdev);
+ kill_bdev(bdev);
+ bdev_write_inode(bdev);
+}
-int bdev_disk_changed(struct block_device *bdev, bool invalidate)
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
- lockdep_assert_held(&bdev->bd_mutex);
-
- if (!(disk->flags & GENHD_FL_UP))
- return -ENXIO;
-
-rescan:
- if (bdev->bd_part_count)
- return -EBUSY;
- sync_blockdev(bdev);
- invalidate_bdev(bdev);
- blk_drop_partitions(disk);
-
- clear_bit(GD_NEED_PART_SCAN, &disk->state);
-
- /*
- * Historically we only set the capacity to zero for devices that
- * support partitions (independ of actually having partitions created).
- * Doing that is rather inconsistent, but changing it broke legacy
- * udisks polling for legacy ide-cdrom devices. Use the crude check
- * below to get the sane behavior for most device while not breaking
- * userspace for this particular setup.
- */
- if (invalidate) {
- if (disk_part_scan_enabled(disk) ||
- !(disk->flags & GENHD_FL_REMOVABLE))
- set_capacity(disk, 0);
+ if (disk->fops->open) {
+ ret = disk->fops->open(bdev, mode);
+ if (ret) {
+ /* avoid ghost partitions on a removed medium */
+ if (ret == -ENOMEDIUM &&
+ test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(disk, true);
+ return ret;
+ }
}
- if (get_capacity(disk)) {
- ret = blk_add_partitions(disk, bdev);
- if (ret == -EAGAIN)
- goto rescan;
- } else if (invalidate) {
- /*
- * Tell userspace that the media / partition table may have
- * changed.
- */
- kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+ if (!bdev->bd_openers) {
+ set_init_blocksize(bdev);
+ if (bdev->bd_bdi == &noop_backing_dev_info)
+ bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
}
+ if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(disk, false);
+ bdev->bd_openers++;
+ return 0;;
+}
- return ret;
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+ if (!--bdev->bd_openers)
+ blkdev_flush_mapping(bdev);
+ if (bdev->bd_disk->fops->release)
+ bdev->bd_disk->fops->release(bdev->bd_disk, mode);
}
-/*
- * Only exported for loop and dasd for historic reasons. Don't use in new
- * code!
- */
-EXPORT_SYMBOL_GPL(bdev_disk_changed);
-/*
- * bd_mutex locking:
- *
- * mutex_lock(part->bd_mutex)
- * mutex_lock_nested(whole->bd_mutex, 1)
- */
-static int __blkdev_get(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
{
- struct gendisk *disk = bdev->bd_disk;
- int ret = 0;
+ struct gendisk *disk = part->bd_disk;
+ struct block_device *whole;
+ int ret;
- if (!(disk->flags & GENHD_FL_UP))
- return -ENXIO;
+ if (part->bd_openers)
+ goto done;
- if (!bdev->bd_openers) {
- if (!bdev_is_partition(bdev)) {
- ret = 0;
- if (disk->fops->open)
- ret = disk->fops->open(bdev, mode);
+ whole = bdgrab(disk->part0);
+ ret = blkdev_get_whole(whole, mode);
+ if (ret)
+ goto out_put_whole;
- if (!ret)
- set_init_blocksize(bdev);
+ ret = -ENXIO;
+ if (!bdev_nr_sectors(part))
+ goto out_blkdev_put;
- /*
- * If the device is invalidated, rescan partition
- * if open succeeded or failed with -ENOMEDIUM.
- * The latter is necessary to prevent ghost
- * partitions on a removed medium.
- */
- if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
- (!ret || ret == -ENOMEDIUM))
- bdev_disk_changed(bdev, ret == -ENOMEDIUM);
+ disk->open_partitions++;
+ set_init_blocksize(part);
+ if (part->bd_bdi == &noop_backing_dev_info)
+ part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
+done:
+ part->bd_openers++;
+ return 0;
- if (ret)
- return ret;
- } else {
- struct block_device *whole = bdgrab(disk->part0);
-
- mutex_lock_nested(&whole->bd_mutex, 1);
- ret = __blkdev_get(whole, mode);
- if (ret) {
- mutex_unlock(&whole->bd_mutex);
- bdput(whole);
- return ret;
- }
- whole->bd_part_count++;
- mutex_unlock(&whole->bd_mutex);
+out_blkdev_put:
+ blkdev_put_whole(whole, mode);
+out_put_whole:
+ bdput(whole);
+ return ret;
+}
- if (!bdev_nr_sectors(bdev)) {
- __blkdev_put(whole, mode, 1);
- bdput(whole);
- return -ENXIO;
- }
- set_init_blocksize(bdev);
- }
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+ struct block_device *whole = bdev_whole(part);
- if (bdev->bd_bdi == &noop_backing_dev_info)
- bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
- } else {
- if (!bdev_is_partition(bdev)) {
- if (bdev->bd_disk->fops->open)
- ret = bdev->bd_disk->fops->open(bdev, mode);
- /* the same as first opener case, read comment there */
- if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
- (!ret || ret == -ENOMEDIUM))
- bdev_disk_changed(bdev, ret == -ENOMEDIUM);
- if (ret)
- return ret;
- }
- }
- bdev->bd_openers++;
- return 0;
+ if (--part->bd_openers)
+ return;
+ blkdev_flush_mapping(part);
+ whole->bd_disk->open_partitions--;
+ blkdev_put_whole(whole, mode);
+ bdput(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
@@ -1447,8 +1405,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
disk_block_events(disk);
- mutex_lock(&bdev->bd_mutex);
- ret =__blkdev_get(bdev, mode);
+ mutex_lock(&disk->open_mutex);
+ ret = -ENXIO;
+ if (!(disk->flags & GENHD_FL_UP))
+ goto abort_claiming;
+ if (bdev_is_partition(bdev))
+ ret = blkdev_get_part(bdev, mode);
+ else
+ ret = blkdev_get_whole(bdev, mode);
if (ret)
goto abort_claiming;
if (mode & FMODE_EXCL) {
@@ -1467,7 +1431,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
unblock_events = false;
}
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
if (unblock_events)
disk_unblock_events(disk);
@@ -1476,7 +1440,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
abort_claiming:
if (mode & FMODE_EXCL)
bd_abort_claiming(bdev, holder);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
put_blkdev:
blkdev_put_no_open(bdev);
@@ -1551,10 +1515,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return 0;
}
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
+void blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
- struct block_device *victim = NULL;
/*
* Sync early if it looks like we're the last one. If someone else
@@ -1566,41 +1529,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_openers == 1)
sync_blockdev(bdev);
- mutex_lock_nested(&bdev->bd_mutex, for_part);
- if (for_part)
- bdev->bd_part_count--;
-
- if (!--bdev->bd_openers) {
- WARN_ON_ONCE(bdev->bd_holders);
- sync_blockdev(bdev);
- kill_bdev(bdev);
- bdev_write_inode(bdev);
- if (bdev_is_partition(bdev))
- victim = bdev_whole(bdev);
- }
-
- if (!bdev_is_partition(bdev) && disk->fops->release)
- disk->fops->release(disk, mode);
- mutex_unlock(&bdev->bd_mutex);
- if (victim) {
- __blkdev_put(victim, mode, 1);
- bdput(victim);
- }
-}
-
-void blkdev_put(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
-
- mutex_lock(&bdev->bd_mutex);
-
+ mutex_lock(&disk->open_mutex);
if (mode & FMODE_EXCL) {
struct block_device *whole = bdev_whole(bdev);
bool bdev_free;
/*
* Release a claim on the device. The holder fields
- * are protected with bdev_lock. bd_mutex is to
+ * are protected with bdev_lock. open_mutex is to
* synchronize disk_holder unlinking.
*/
spin_lock(&bdev_lock);
@@ -1631,9 +1567,13 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* from userland - e.g. eject(1).
*/
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
- mutex_unlock(&bdev->bd_mutex);
- __blkdev_put(bdev, mode, 0);
+ if (bdev_is_partition(bdev))
+ blkdev_put_part(bdev, mode);
+ else
+ blkdev_put_whole(bdev, mode);
+ mutex_unlock(&disk->open_mutex);
+
blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
@@ -1941,10 +1881,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
old_inode = inode;
bdev = I_BDEV(inode);
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers)
func(bdev, arg);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 782e16795bc4..807502cd6510 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
lockdep_assert_held(&uuid_mutex);
/*
* The device_list_mutex cannot be taken here in case opening the
- * underlying device takes further locks like bd_mutex.
+ * underlying device takes further locks like open_mutex.
*
* We also don't need the lock here as this is called during mount and
* exclusion is provided by uuid_mutex
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 62193106683d..8c7e9e51a398 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2343,28 +2343,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
return ret;
}
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
-{
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
- struct dentry *dentry;
- const char *name = "?";
-
- dentry = d_find_alias(inode);
- if (dentry) {
- spin_lock(&dentry->d_lock);
- name = (const char *) dentry->d_name.name;
- }
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- if (dentry) {
- spin_unlock(&dentry->d_lock);
- dput(dentry);
- }
- }
-}
-
/**
* __mark_inode_dirty - internal function to mark an inode dirty
*
@@ -2434,9 +2412,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
- if (unlikely(block_dump))
- block_dump___mark_inode_dirty(inode);
-
spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
diff --git a/fs/super.c b/fs/super.c
index 11b7e7213fd1..91b7f156735b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
@@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/