summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 11:43:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 11:43:44 -0700
commit1ddeeb2a058d7b2a58ed9e820396b4ceb715d529 (patch)
tree32a27b8eb1c538239b641292d77dc1a8cee8ee97 /drivers/block/drbd
parentd2c84bdce25a678c1e1f116d65b58790bd241af0 (diff)
parent5205a4aa8fc9454853b705b69611c80e9c644283 (diff)
downloadlinux-1ddeeb2a058d7b2a58ed9e820396b4ceb715d529.tar.gz
linux-1ddeeb2a058d7b2a58ed9e820396b4ceb715d529.tar.bz2
linux-1ddeeb2a058d7b2a58ed9e820396b4ceb715d529.zip
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - MD pull requests via Song: - Cleanup redundant checks (Yu Kuai) - Remove deprecated headers (Marc Zyngier, Song Liu) - Concurrency fixes (Li Lingfeng) - Memory leak fix (Li Nan) - Refactor raid1 read_balance (Yu Kuai, Paul Luse) - Clean up and fix for md_ioctl (Li Nan) - Other small fixes (Gui-Dong Han, Heming Zhao) - MD atomic limits (Christoph) - NVMe pull request via Keith: - RDMA target enhancements (Max) - Fabrics fixes (Max, Guixin, Hannes) - Atomic queue_limits usage (Christoph) - Const use for class_register (Ricardo) - Identification error handling fixes (Shin'ichiro, Keith) - Improvement and cleanup for cached request handling (Christoph) - Moving towards atomic queue limits. Core changes and driver bits so far (Christoph) - Fix UAF issues in aoeblk (Chun-Yi) - Zoned fix and cleanups (Damien) - s390 dasd cleanups and fixes (Jan, Miroslav) - Block issue timestamp caching (me) - noio scope guarding for zoned IO (Johannes) - block/nvme PI improvements (Kanchan) - Ability to terminate long running discard loop (Keith) - bdev revalidation fix (Li) - Get rid of old nr_queues hack for kdump kernels (Ming) - Support for async deletion of ublk (Ming) - Improve IRQ bio recycling (Pavel) - Factor in CPU capacity for remote vs local completion (Qais) - Add shared_tags configfs entry for null_blk (Shin'ichiro - Fix for a regression in page refcounts introduced by the folio unification (Tony) - Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid, Ricardo, Roman, Tang, Uwe) * tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits) block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC block/swim: Convert to platform remove callback returning void cdrom: gdrom: Convert to platform remove callback returning void block: remove disk_stack_limits md: remove mddev->queue md: don't initialize queue limits md/raid10: use the atomic queue limit update APIs md/raid5: use the atomic queue limit update APIs md/raid1: use the atomic queue limit update APIs md/raid0: use the atomic queue limit update APIs md: add queue limit helpers md: add a mddev_is_dm helper md: add a mddev_add_trace_msg helper md: add a mddev_trace_remap helper bcache: move calculation of stripe_size and io_opt into bcache_device_init virtio_blk: Do not use disk_set_max_open/active_zones() aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts block: move capacity validation to blkpg_do_ioctl() block: prevent division by zero in blk_rq_stat_sum() drbd: atomically update queue limits in drbd_reconsider_queue_parameters ...
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_main.c17
-rw-r--r--drivers/block/drbd/drbd_nl.c210
-rw-r--r--drivers/block/drbd/drbd_state.c24
-rw-r--r--drivers/block/drbd/drbd_state_change.h8
4 files changed, 131 insertions, 128 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6bc86106c7b2..113b441d4d36 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
int id;
int vnr = adm_ctx->volume;
enum drbd_ret_code err = ERR_NOMEM;
+ struct queue_limits lim = {
+ /*
+ * Setting the max_hw_sectors to an odd value of 8kibyte here.
+ * This triggers a max_bio_size message upon first attach or
+ * connect.
+ */
+ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
+ };
device = minor_to_device(minor);
if (device)
@@ -2708,9 +2716,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
goto out_no_disk;
+ }
device->vdisk = disk;
device->rq_queue = disk->queue;
@@ -2727,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
blk_queue_write_cache(disk->queue, true, true);
- /* Setting the max_hw_sectors to an odd value of 8kibyte here
- This triggers a max_bio_size message upon first attach or connect */
- blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8);
device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6aed67278e8b..5d65c9754d83 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
return 0;
}
-static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
+static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
{
- q->limits.discard_granularity = granularity;
+ /*
+ * We may ignore peer limits if the peer is modern enough. From 8.3.8
+ * onwards the peer can use multiple BIOs for a single peer_request.
+ */
+ if (device->state.conn < C_WF_REPORT_PARAMS)
+ return device->peer_max_bio_size;
+
+ if (first_peer_device(device)->connection->agreed_pro_version < 94)
+ return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+
+ /*
+ * Correct old drbd (up to 8.3.7) if it believes it can do more than
+ * 32KiB.
+ */
+ if (first_peer_device(device)->connection->agreed_pro_version == 94)
+ return DRBD_MAX_SIZE_H80_PACKET;
+
+ /*
+ * drbd 8.3.8 onwards, before 8.4.0
+ */
+ if (first_peer_device(device)->connection->agreed_pro_version < 100)
+ return DRBD_MAX_BIO_SIZE_P95;
+ return DRBD_MAX_BIO_SIZE;
}
static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
@@ -1204,149 +1226,119 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
return AL_EXTENT_SIZE >> 9;
}
-static void decide_on_discard_support(struct drbd_device *device,
+static bool drbd_discard_supported(struct drbd_connection *connection,
struct drbd_backing_dev *bdev)
{
- struct drbd_connection *connection =
- first_peer_device(device)->connection;
- struct request_queue *q = device->rq_queue;
- unsigned int max_discard_sectors;
-
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
- goto not_supported;
+ return false;
if (connection->cstate >= C_CONNECTED &&
!(connection->agreed_features & DRBD_FF_TRIM)) {
drbd_info(connection,
"peer DRBD too old, does not support TRIM: disabling discards\n");
- goto not_supported;
+ return false;
}
- /*
- * We don't care for the granularity, really.
- *
- * Stacking limits below should fix it for the local device. Whether or
- * not it is a suitable granularity on the remote device is not our
- * problem, really. If you care, you need to use devices with similar
- * topology on all peers.
- */
- blk_queue_discard_granularity(q, 512);
- max_discard_sectors = drbd_max_discard_sectors(connection);
- blk_queue_max_discard_sectors(q, max_discard_sectors);
- blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- return;
-
-not_supported:
- blk_queue_discard_granularity(q, 0);
- blk_queue_max_discard_sectors(q, 0);
+ return true;
}
-static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
+/* This is the workaround for "bio would need to, but cannot, be split" */
+static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
{
- /* Fixup max_write_zeroes_sectors after blk_stack_limits():
- * if we can handle "zeroes" efficiently on the protocol,
- * we want to do that, even if our backend does not announce
- * max_write_zeroes_sectors itself. */
- struct drbd_connection *connection = first_peer_device(device)->connection;
- /* If the peer announces WZEROES support, use it. Otherwise, rather
- * send explicit zeroes than rely on some discard-zeroes-data magic. */
- if (connection->agreed_features & DRBD_FF_WZEROES)
- q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
- else
- q->limits.max_write_zeroes_sectors = 0;
-}
+ unsigned int max_segments;
-static void fixup_discard_support(struct drbd_device *device, struct request_queue *q)
-{
- unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
- unsigned int discard_granularity =
- device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
+ rcu_read_lock();
+ max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
+ rcu_read_unlock();
- if (discard_granularity > max_discard) {
- blk_queue_discard_granularity(q, 0);
- blk_queue_max_discard_sectors(q, 0);
- }
+ if (!max_segments)
+ return BLK_MAX_SEGMENTS;
+ return max_segments;
}
-static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
- unsigned int max_bio_size, struct o_qlim *o)
+void drbd_reconsider_queue_parameters(struct drbd_device *device,
+ struct drbd_backing_dev *bdev, struct o_qlim *o)
{
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
struct request_queue * const q = device->rq_queue;
- unsigned int max_hw_sectors = max_bio_size >> 9;
- unsigned int max_segments = 0;
+ unsigned int now = queue_max_hw_sectors(q) << 9;
+ struct queue_limits lim;
struct request_queue *b = NULL;
- struct disk_conf *dc;
+ unsigned int new;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
- max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
- rcu_read_lock();
- dc = rcu_dereference(device->ldev->disk_conf);
- max_segments = dc->max_bio_bvecs;
- rcu_read_unlock();
-
- blk_set_stacking_limits(&q->limits);
+ device->local_max_bio_size =
+ queue_max_hw_sectors(b) << SECTOR_SHIFT;
}
- blk_queue_max_hw_sectors(q, max_hw_sectors);
- /* This is the workaround for "bio would need to, but cannot, be split" */
- blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
- blk_queue_segment_boundary(q, PAGE_SIZE-1);
- decide_on_discard_support(device, bdev);
-
- if (b) {
- blk_stack_limits(&q->limits, &b->limits, 0);
- disk_update_readahead(device->vdisk);
+ /*
+ * We may later detach and re-attach on a disconnected Primary. Avoid
+ * decreasing the value in this case.
+ *
+ * We want to store what we know the peer DRBD can handle, not what the
+ * peer IO backend can handle.
+ */
+ new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
+ max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
+ if (new != now) {
+ if (device->state.role == R_PRIMARY && new < now)
+ drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
+ new, now);
+ drbd_info(device, "max BIO size = %u\n", new);
}
- fixup_write_zeroes(device, q);
- fixup_discard_support(device, q);
-}
-
-void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
-{
- unsigned int now, new, local, peer;
-
- now = queue_max_hw_sectors(device->rq_queue) << 9;
- local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
- peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
+ lim = queue_limits_start_update(q);
if (bdev) {
- local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
- device->local_max_bio_size = local;
+ blk_set_stacking_limits(&lim);
+ lim.max_segments = drbd_backing_dev_max_segments(device);
+ } else {
+ lim.max_segments = BLK_MAX_SEGMENTS;
}
- local = min(local, DRBD_MAX_BIO_SIZE);
- /* We may ignore peer limits if the peer is modern enough.
- Because new from 8.3.8 onwards the peer can use multiple
- BIOs for a single peer_request */
- if (device->state.conn >= C_WF_REPORT_PARAMS) {
- if (first_peer_device(device)->connection->agreed_pro_version < 94)
- peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
- /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
- else if (first_peer_device(device)->connection->agreed_pro_version == 94)
- peer = DRBD_MAX_SIZE_H80_PACKET;
- else if (first_peer_device(device)->connection->agreed_pro_version < 100)
- peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
- else
- peer = DRBD_MAX_BIO_SIZE;
+ lim.max_hw_sectors = new >> SECTOR_SHIFT;
+ lim.seg_boundary_mask = PAGE_SIZE - 1;
- /* We may later detach and re-attach on a disconnected Primary.
- * Avoid this setting to jump back in that case.
- * We want to store what we know the peer DRBD can handle,
- * not what the peer IO backend can handle. */
- if (peer > device->peer_max_bio_size)
- device->peer_max_bio_size = peer;
+ /*
+ * We don't care for the granularity, really.
+ *
+ * Stacking limits below should fix it for the local device. Whether or
+ * not it is a suitable granularity on the remote device is not our
+ * problem, really. If you care, you need to use devices with similar
+ * topology on all peers.
+ */
+ if (drbd_discard_supported(connection, bdev)) {
+ lim.discard_granularity = 512;
+ lim.max_hw_discard_sectors =
+ drbd_max_discard_sectors(connection);
+ } else {
+ lim.discard_granularity = 0;
+ lim.max_hw_discard_sectors = 0;
}
- new = min(local, peer);
- if (device->state.role == R_PRIMARY && new < now)
- drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
+ if (bdev)
+ blk_stack_limits(&lim, &b->limits, 0);
- if (new != now)
- drbd_info(device, "max BIO size = %u\n", new);
+ /*
+ * If we can handle "zeroes" efficiently on the protocol, we want to do
+ * that, even if our backend does not announce max_write_zeroes_sectors
+ * itself.
+ */
+ if (connection->agreed_features & DRBD_FF_WZEROES)
+ lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
+ else
+ lim.max_write_zeroes_sectors = 0;
+
+ if ((lim.discard_granularity >> SECTOR_SHIFT) >
+ lim.max_hw_discard_sectors) {
+ lim.discard_granularity = 0;
+ lim.max_hw_discard_sectors = 0;
+ }
- drbd_setup_queue_param(device, bdev, new, o);
+ if (queue_limits_commit_update(q, &lim))
+ drbd_err(device, "setting new queue limits failed\n");
}
/* Starts the worker thread */
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 287a8d1d3f70..e858e7e0383f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
int notify_resource_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_resource_state_change *resource_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_resource_state_change *resource_state_change = state_change;
struct drbd_resource *resource = resource_state_change->resource;
struct resource_info resource_info = {
.res_role = resource_state_change->role[NEW],
@@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb,
int notify_connection_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_connection_state_change *connection_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
- struct drbd_connection *connection = connection_state_change->connection;
+ struct drbd_connection_state_change *p = state_change;
+ struct drbd_connection *connection = p->connection;
struct connection_info connection_info = {
- .conn_connection_state = connection_state_change->cstate[NEW],
- .conn_role = connection_state_change->peer_role[NEW],
+ .conn_connection_state = p->cstate[NEW],
+ .conn_role = p->peer_role[NEW],
};
return notify_connection_state(skb, seq, connection, &connection_info, type);
@@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb,
int notify_device_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_device_state_change *device_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_device_state_change *device_state_change = state_change;
struct drbd_device *device = device_state_change->device;
struct device_info device_info = {
.dev_disk_state = device_state_change->disk_state[NEW],
@@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb,
int notify_peer_device_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_peer_device_state_change *p,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_peer_device_state_change *p = state_change;
struct drbd_peer_device *peer_device = p->peer_device;
struct peer_device_info peer_device_info = {
.peer_repl_state = p->repl_state[NEW],
@@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
bool resource_state_has_changed;
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
- int (*last_func)(struct sk_buff *, unsigned int, void *,
- enum drbd_notification_type) = NULL;
+ int (*last_func)(struct sk_buff *, unsigned int,
+ void *, enum drbd_notification_type) = NULL;
void *last_arg = NULL;
#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
@@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
})
#define REMEMBER_STATE_CHANGE(func, arg, type) \
({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
- last_func = (typeof(last_func))func; \
+ last_func = func; \
last_arg = arg; \
})
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index 9d78d8e3912e..a56a57d67686 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *);
extern int notify_resource_state_change(struct sk_buff *,
unsigned int,
- struct drbd_resource_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_connection_state_change(struct sk_buff *,
unsigned int,
- struct drbd_connection_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_device_state_change(struct sk_buff *,
unsigned int,
- struct drbd_device_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_peer_device_state_change(struct sk_buff *,
unsigned int,
- struct drbd_peer_device_state_change *,
+ void *,
enum drbd_notification_type type);
#endif /* DRBD_STATE_CHANGE_H */