drbd: fix resync finished detection

This fixes one recent regresion, and one long existing bug. The bug: drbd_try_clear_on_disk_bm() assumed that all "count" bits have to be accounted in the resync extent corresponding to the start sector. Since we allow application requests to cross our "extent" boundaries, this assumption is no longer true, resulting in possible misaccounting, scary messages ("BAD! sector=12345s enr=6 rs_left=-7 rs_failed=0 count=58 cstate=..."), and potentially, if the last bit to be cleared during resync would reside in previously misaccounted resync extent, the resync would never be recognized as finished, but would be "stalled" forever, even though all blocks are in sync again and all bits have been cleared... The regression was introduced by drbd: get rid of atomic update on disk bitmap works For an "empty" resync (rs_total == 0), we must not "finish" the resync on the SyncSource before the SyncTarget knows all relevant information (sync uuid). We need to wait for the full round-trip, the SyncTarget will then explicitly notify us. Also for normal, non-empty resyncs (rs_total > 0), the resync-finished condition needs to be tested before the schedule() in wait_for_work, or it is likely to be missed. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2014-01-27 15:58:22 +0100
committer: Philipp Reisner <philipp.reisner@linbit.com> 2014-07-10 18:34:50 +0200
commit: 5ab7d2c005135849cf0bb1485d954c98f2cca57c (patch)
tree: 43340069d199c864871c04f9672639415a7bb8fa /drivers/block/drbd/drbd_worker.c
parent: a80ca1ae81fc52e304e753f6de4ef248df364f9e (diff)
download: linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.tar.gz
linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.tar.bz2
linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.zip
1 files changed, 22 insertions, 20 deletions
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 47bc84017b5b..bafb62eb22c9 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1740,11 +1740,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 			device->rs_mark_time[i] = now;
 		}
 		_drbd_pause_after(device);
+		/* Forget potentially stale cached per resync extent bit-counts.
+		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
+		 * disabled, and know the disk state is ok. */
+		spin_lock(&device->al_lock);
+		lc_reset(device->resync);
+		device->resync_locked = 0;
+		device->resync_wenr = LC_FREE;
+		spin_unlock(&device->al_lock);
 	}
 	write_unlock(&global_state_lock);
 	spin_unlock_irq(&device->resource->req_lock);
 
 	if (r == SS_SUCCESS) {
+		wake_up(&device->al_wait); /* for lc_reset() above */
 		/* reset rs_last_bcast when a resync or verify is started,
 		 * to deal with potential jiffies wrap. */
 		device->rs_last_bcast = jiffies - HZ;
@@ -1807,36 +1816,22 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 static void update_on_disk_bitmap(struct drbd_device *device)
 {
 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
+	bool resync_done = test_and_clear_bit(RS_DONE, &device->flags);
 	device->rs_last_bcast = jiffies;
 
 	if (!get_ldev(device))
 		return;
 
 	drbd_bm_write_lazy(device, 0);
-	if (drbd_bm_total_weight(device) <= device->rs_failed)
+	if (resync_done && is_sync_state(device->state.conn))
 		drbd_resync_finished(device);
+
 	drbd_bcast_event(device, &sib);
 	/* update timestamp, in case it took a while to write out stuff */
 	device->rs_last_bcast = jiffies;
 	put_ldev(device);
 }
 
-bool wants_lazy_bitmap_update(struct drbd_device *device)
-{
-	enum drbd_conns connection_state = device->state.conn;
-	return
-	/* only do a lazy writeout, if device is in some resync state */
-	   (connection_state == C_SYNC_SOURCE
-	||  connection_state == C_SYNC_TARGET
-	||  connection_state == C_PAUSED_SYNC_S
-	||  connection_state == C_PAUSED_SYNC_T) &&
-	/* AND
-	 * either we just finished, or the last lazy update
-	 * was some time ago already. */
-	   (drbd_bm_total_weight(device) <= device->rs_failed
-	||  time_after(jiffies, device->rs_last_bcast + 2*HZ));
-}
-
 static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection)
 {
 	struct drbd_peer_device *peer_device;
@@ -1845,8 +1840,9 @@ static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection)
 	rcu_read_lock();
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 		struct drbd_device *device = peer_device->device;
-		if (!wants_lazy_bitmap_update(device))
+		if (!test_and_clear_bit(RS_PROGRESS, &device->flags))
 			continue;
+
 		kref_get(&device->kref);
 		rcu_read_unlock();
 		update_on_disk_bitmap(device);
@@ -1930,15 +1926,18 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 		if (send_barrier)
 			maybe_send_barrier(connection,
 					connection->send.current_epoch_nr + 1);
+
+		if (test_bit(CONN_RS_PROGRESS, &connection->flags))
+			break;
+
 		/* drbd_send() may have called flush_signals() */
 		if (get_t_state(&connection->worker) != RUNNING)
 			break;
+
 		schedule();
 		/* may be woken up for other things but new work, too,
 		 * e.g. if the current epoch got closed.
 		 * In which case we send the barrier above. */
-
-		try_update_all_on_disk_bitmaps(connection);
 	}
 	finish_wait(&connection->sender_work.q_wait, &wait);
 
@@ -1973,6 +1972,9 @@ int drbd_worker(struct drbd_thread *thi)
 		if (list_empty(&work_list))
 			wait_for_work(connection, &work_list);
 
+		if (test_and_clear_bit(CONN_RS_PROGRESS, &connection->flags))
+			try_update_all_on_disk_bitmaps(connection);
+
 		if (signal_pending(current)) {
 			flush_signals(current);
 			if (get_t_state(thi) == RUNNING) {
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2014-01-27 15:58:22 +0100
committer	Philipp Reisner <philipp.reisner@linbit.com>	2014-07-10 18:34:50 +0200
commit	5ab7d2c005135849cf0bb1485d954c98f2cca57c (patch)
tree	43340069d199c864871c04f9672639415a7bb8fa /drivers/block/drbd/drbd_worker.c
parent	a80ca1ae81fc52e304e753f6de4ef248df364f9e (diff)
download	linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.tar.gz linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.tar.bz2 linux-stable-5ab7d2c005135849cf0bb1485d954c98f2cca57c.zip