summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 09:15:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 09:15:35 -0700
commit929cfdd5d3bdc772aff32e5a3fb4e3894394aa75 (patch)
treef67202d079eaf1f8d65b2e1bfac70b768ae34bc4 /drivers/block/drbd
parent798ce8f1cca29dcc3f4b55947f611f4ffb32ac2b (diff)
parenta1c15c59feee36267c43142a41152fbf7402afb6 (diff)
downloadlinux-929cfdd5d3bdc772aff32e5a3fb4e3894394aa75.tar.gz
linux-929cfdd5d3bdc772aff32e5a3fb4e3894394aa75.tar.bz2
linux-929cfdd5d3bdc772aff32e5a3fb4e3894394aa75.zip
Merge branch 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block: (110 commits) loop: handle on-demand devices correctly loop: limit 'max_part' module param to DISK_MAX_PARTS drbd: fix warning drbd: fix warning drbd: Fix spelling drbd: fix schedule in atomic drbd: Take a more conservative approach when deciding max_bio_size drbd: Fixed state transitions after async outdate-peer-handler returned drbd: Disallow the peer_disk_state to be D_OUTDATED while connected drbd: Fix for the connection problems on high latency links drbd: fix potential activity log refcount imbalance in error path drbd: Only downgrade the disk state in case of disk failures drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int drbd: fix potential distributed deadlock lru_cache.h: fix comments referring to ts_ instead of lc_ drbd: Fix for application IO with the on-io-error=pass-on policy xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions. xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override. xen/blkback: don't fail empty barrier requests xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end() ...
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h19
-rw-r--r--drivers/block/drbd/drbd_main.c37
-rw-r--r--drivers/block/drbd/drbd_nl.c127
-rw-r--r--drivers/block/drbd/drbd_receiver.c68
-rw-r--r--drivers/block/drbd/drbd_req.c20
-rw-r--r--drivers/block/drbd/drbd_req.h5
-rw-r--r--drivers/block/drbd/drbd_worker.c98
9 files changed, 243 insertions, 139 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c6828b68d77b..09ef9a878ef0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -28,7 +28,7 @@
#include "drbd_int.h"
#include "drbd_wrappers.h"
-/* We maintain a trivial check sum in our on disk activity log.
+/* We maintain a trivial checksum in our on disk activity log.
* With that we can ensure correct operation even when the storage
* device might do a partial (last) sector write while losing power.
*/
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 76210ba401ac..f440a02dfdb1 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -74,7 +74,7 @@
* as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
* seems excessive.
*
- * We plan to reduce the amount of in-core bitmap pages by pageing them in
+ * We plan to reduce the amount of in-core bitmap pages by paging them in
* and out against their on-disk location as necessary, but need to make
* sure we don't cause too much meta data IO, and must not deadlock in
* tight memory situations. This needs some more work.
@@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
* we if bits have been cleared since last IO. */
#define BM_PAGE_LAZY_WRITEOUT 28
-/* store_page_idx uses non-atomic assingment. It is only used directly after
+/* store_page_idx uses non-atomic assignment. It is only used directly after
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
* use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
* changes) may happen from various contexts, and wait_on_bit/wake_up_bit
@@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr)
/* word offset from start of bitmap to word number _in_page_
* modulo longs per page
#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
so do it explicitly:
*/
#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d871b14ed5a1..ef2ceed3be4b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -700,7 +700,7 @@ struct drbd_request {
* see drbd_endio_pri(). */
struct bio *private_bio;
- struct hlist_node colision;
+ struct hlist_node collision;
sector_t sector;
unsigned int size;
unsigned int epoch; /* barrier_nr */
@@ -766,7 +766,7 @@ struct digest_info {
struct drbd_epoch_entry {
struct drbd_work w;
- struct hlist_node colision;
+ struct hlist_node collision;
struct drbd_epoch *epoch; /* for writes */
struct drbd_conf *mdev;
struct page *pages;
@@ -1129,6 +1129,8 @@ struct drbd_conf {
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
int rs_planed; /* resync sectors already planned */
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
+ int peer_max_bio_size;
+ int local_max_bio_size;
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1218,8 +1220,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
unsigned int set_size);
extern void tl_clear(struct drbd_conf *mdev);
-enum drbd_req_event;
-extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
extern void drbd_free_sock(struct drbd_conf *mdev);
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1434,6 +1434,7 @@ struct bm_extent {
* hash table. */
#define HT_SHIFT 8
#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
+#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
@@ -1518,9 +1519,9 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
extern char *ppsize(char *buf, unsigned long long size);
extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
extern void resync_after_online_grow(struct drbd_conf *);
-extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
+extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
enum drbd_role new_role,
int force);
@@ -1828,6 +1829,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
if (!forcedetach) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
+ if (mdev->state.disk > D_INCONSISTENT)
+ _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
break;
}
/* NOTE fall through to detach case if forcedetach set */
@@ -2153,6 +2156,10 @@ static inline int get_net_conf(struct drbd_conf *mdev)
static inline void put_ldev(struct drbd_conf *mdev)
{
int i = atomic_dec_return(&mdev->local_cnt);
+
+ /* This may be called from some endio handler,
+ * so we must not sleep here. */
+
__release(local);
D_ASSERT(i >= 0);
if (i == 0) {
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5b525c179f39..0358e55356c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
mdev->agreed_pro_version < 88)
rv = SS_NOT_SUPPORTED;
+ else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
+ rv = SS_CONNECTED_OUTDATES;
+
return rv;
}
@@ -1565,6 +1568,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
put_ldev(mdev);
}
+ /* Notify peer that I had a local IO error, and did not detached.. */
+ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
+ drbd_send_state(mdev);
+
/* Disks got bigger while they were detached */
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
@@ -2064,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
{
struct p_sizes p;
sector_t d_size, u_size;
- int q_order_type;
+ int q_order_type, max_bio_size;
int ok;
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -2072,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
d_size = drbd_get_max_capacity(mdev->ldev);
u_size = mdev->ldev->dc.disk_size;
q_order_type = drbd_queue_order_type(mdev);
+ max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+ max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
put_ldev(mdev);
} else {
d_size = 0;
u_size = 0;
q_order_type = QUEUE_ORDERED_NONE;
+ max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
}
p.d_size = cpu_to_be64(d_size);
p.u_size = cpu_to_be64(u_size);
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
- p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
+ p.max_bio_size = cpu_to_be32(max_bio_size);
p.queue_order_type = cpu_to_be16(q_order_type);
p.dds_flags = cpu_to_be16(flags);
@@ -2722,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
/* double check digest, sometimes buffers have been modified in flight. */
if (dgs > 0 && dgs <= 64) {
- /* 64 byte, 512 bit, is the larges digest size
+ /* 64 byte, 512 bit, is the largest digest size
* currently supported in kernel crypto. */
unsigned char digest[64];
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
@@ -3041,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
mdev->agreed_pro_version = PRO_VERSION_MAX;
mdev->write_ordering = WO_bdev_flush;
mdev->resync_wenr = LC_FREE;
+ mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
+ mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
}
void drbd_mdev_cleanup(struct drbd_conf *mdev)
@@ -3275,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor)
drbd_release_ee_lists(mdev);
- /* should be free'd on disconnect? */
+ /* should be freed on disconnect? */
kfree(mdev->ee_hash);
/*
mdev->ee_hash_s = 0;
@@ -3415,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
q->backing_dev_info.congested_data = mdev;
blk_queue_make_request(q, drbd_make_request);
- blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
+ /* Setting the max_hw_sectors to an odd value of 8kibyte here
+ This triggers a max_bio_size message upon first attach or connect */
+ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
blk_queue_merge_bvec(q, drbd_merge_bvec);
q->queue_lock = &mdev->req_lock;
@@ -3627,7 +3641,8 @@ struct meta_data_on_disk {
/* `-- act_log->nr_elements <-- sync_conf.al_extents */
u32 bm_offset; /* offset to the bitmap, from here */
u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
- u32 reserved_u32[4];
+ u32 la_peer_max_bio_size; /* last peer max_bio_size */
+ u32 reserved_u32[3];
} __packed;
@@ -3668,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+ buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
sector = mdev->ldev->md.md_offset;
@@ -3751,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.conn < C_CONNECTED) {
+ int peer;
+ peer = be32_to_cpu(buffer->la_peer_max_bio_size);
+ peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+ mdev->peer_max_bio_size = peer;
+ }
+ spin_unlock_irq(&mdev->req_lock);
+
if (mdev->sync_conf.al_extents < 7)
mdev->sync_conf.al_extents = 127;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 03b29f78a37d..515bcd948a43 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -272,9 +272,28 @@ static int _try_outdate_peer_async(void *data)
{
struct drbd_conf *mdev = (struct drbd_conf *)data;
enum drbd_disk_state nps;
+ union drbd_state ns;
nps = drbd_try_outdate_peer(mdev);
- drbd_request_state(mdev, NS(pdsk, nps));
+
+ /* Not using
+ drbd_request_state(mdev, NS(pdsk, nps));
+ here, because we might were able to re-establish the connection
+ in the meantime. This can only partially be solved in the state's
+ engine is_valid_state() and is_valid_state_transition()
+ functions.
+
+ nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
+ pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
+ therefore we have to have the pre state change check here.
+ */
+ spin_lock_irq(&mdev->req_lock);
+ ns = mdev->state;
+ if (ns.conn < C_WF_REPORT_PARAMS) {
+ ns.pdsk = nps;
+ _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+ }
+ spin_unlock_irq(&mdev->req_lock);
return 0;
}
@@ -577,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
* Returns 0 on success, negative return values indicate errors.
* You should call drbd_md_sync() after calling this function.
*/
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
{
sector_t prev_first_sect, prev_size; /* previous meta location */
sector_t la_size;
@@ -773,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
return 0;
}
-void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
+static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
{
struct request_queue * const q = mdev->rq_queue;
- struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
- int max_segments = mdev->ldev->dc.max_bio_bvecs;
- int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+ int max_hw_sectors = max_bio_size >> 9;
+ int max_segments = 0;
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+ max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+ max_segments = mdev->ldev->dc.max_bio_bvecs;
+ put_ldev(mdev);
+ }
blk_queue_logical_block_size(q, 512);
blk_queue_max_hw_sectors(q, max_hw_sectors);
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
- blk_queue_stack_limits(q, b);
- dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+ blk_queue_stack_limits(q, b);
- if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
- dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
- q->backing_dev_info.ra_pages,
- b->backing_dev_info.ra_pages);
- q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+ if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+ dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+ q->backing_dev_info.ra_pages,
+ b->backing_dev_info.ra_pages);
+ q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+ }
+ put_ldev(mdev);
}
}
+void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
+{
+ int now, new, local, peer;
+
+ now = queue_max_hw_sectors(mdev->rq_queue) << 9;
+ local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
+ peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+ mdev->local_max_bio_size = local;
+ put_ldev(mdev);
+ }
+
+ /* We may ignore peer limits if the peer is modern enough.
+ Because new from 8.3.8 onwards the peer can use multiple
+ BIOs for a single peer_request */
+ if (mdev->state.conn >= C_CONNECTED) {
+ if (mdev->agreed_pro_version < 94)
+ peer = mdev->peer_max_bio_size;
+ else if (mdev->agreed_pro_version == 94)
+ peer = DRBD_MAX_SIZE_H80_PACKET;
+ else /* drbd 8.3.8 onwards */
+ peer = DRBD_MAX_BIO_SIZE;
+ }
+
+ new = min_t(int, local, peer);
+
+ if (mdev->state.role == R_PRIMARY && new < now)
+ dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+
+ if (new != now)
+ dev_info(DEV, "max BIO size = %u\n", new);
+
+ drbd_setup_queue_param(mdev, new);
+}
+
/* serialize deconfig (worker exiting, doing cleanup)
* and reconfig (drbdsetup disk, drbdsetup net)
*
@@ -865,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
- unsigned int max_bio_size;
enum drbd_state_rv rv;
int cp_discovered = 0;
int logical_block_size;
@@ -1117,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->read_cnt = 0;
mdev->writ_cnt = 0;
- max_bio_size = DRBD_MAX_BIO_SIZE;
- if (mdev->state.conn == C_CONNECTED) {
- /* We are Primary, Connected, and now attach a new local
- * backing store. We must not increase the user visible maximum
- * bio size on this device to something the peer may not be
- * able to handle. */
- if (mdev->agreed_pro_version < 94)
- max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
- else if (mdev->agreed_pro_version == 94)
- max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
- /* else: drbd 8.3.9 and later, stay with default */
- }
-
- drbd_setup_queue_param(mdev, max_bio_size);
+ drbd_reconsider_max_bio_size(mdev);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
@@ -1152,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
!drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
set_bit(USE_DEGR_WFC_T, &mdev->flags);
- dd = drbd_determin_dev_size(mdev, 0);
+ dd = drbd_determine_dev_size(mdev, 0);
if (dd == dev_size_error) {
retcode = ERR_NOMEM_BITMAP;
goto force_diskless_dec;
@@ -1281,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
+ enum drbd_ret_code retcode;
+ int ret;
drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
- reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
- if (mdev->state.disk == D_DISKLESS)
- wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+ retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+ /* D_FAILED will transition to DISKLESS. */
+ ret = wait_event_interruptible(mdev->misc_wait,
+ mdev->state.disk != D_FAILED);
drbd_resume_io(mdev);
+ if ((int)retcode == (int)SS_IS_DISKLESS)
+ retcode = SS_NOTHING_TO_DO;
+ if (ret)
+ retcode = ERR_INTR;
+ reply->ret_code = retcode;
return 0;
}
@@ -1658,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
- dd = drbd_determin_dev_size(mdev, ddsf);
+ dd = drbd_determine_dev_size(mdev, ddsf);
drbd_md_sync(mdev);
put_ldev(mdev);
if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fd26666c0b08..25d32c5aa50a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
if (!page)
goto fail;
- INIT_HLIST_NODE(&e->colision);
+ INIT_HLIST_NODE(&e->collision);
e->epoch = NULL;
e->mdev = mdev;
e->pages = page;
@@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
kfree(e->digest);
drbd_pp_free(mdev, e->pages, is_net);
D_ASSERT(atomic_read(&e->pending_bios) == 0);
- D_ASSERT(hlist_unhashed(&e->colision));
+ D_ASSERT(hlist_unhashed(&e->collision));
mempool_free(e, drbd_ee_mempool);
}
@@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev)
}
if (sock && msock) {
- schedule_timeout_interruptible(HZ / 10);
+ schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
ok = drbd_socket_okay(mdev, &sock);
ok = drbd_socket_okay(mdev, &msock) && ok;
if (ok)
@@ -899,11 +899,6 @@ retry:
drbd_thread_start(&mdev->asender);
- if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
- drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
- put_ldev(mdev);
- }
-
if (drbd_send_protocol(mdev) == -1)
return -1;
drbd_send_sync_param(mdev, &mdev->sync_conf);
@@ -1418,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
sector_t sector = e->sector;
int ok;
- D_ASSERT(hlist_unhashed(&e->colision));
+ D_ASSERT(hlist_unhashed(&e->collision));
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
drbd_set_in_sync(mdev, sector, e->size);
@@ -1487,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
return false;
}
- /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+ /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
* special casing it there for the various failure cases.
* still no race with drbd_fail_pending_reads */
ok = recv_dless_read(mdev, req, sector, data_size);
@@ -1558,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
if (mdev->net_conf->two_primaries) {
spin_lock_irq(&mdev->req_lock);
- D_ASSERT(!hlist_unhashed(&e->colision));
- hlist_del_init(&e->colision);
+ D_ASSERT(!hlist_unhashed(&e->collision));
+ hlist_del_init(&e->collision);
spin_unlock_irq(&mdev->req_lock);
} else {
- D_ASSERT(hlist_unhashed(&e->colision));
+ D_ASSERT(hlist_unhashed(&e->collision));
}
drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1579,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
spin_lock_irq(&mdev->req_lock);
- D_ASSERT(!hlist_unhashed(&e->colision));
- hlist_del_init(&e->colision);
+ D_ASSERT(!hlist_unhashed(&e->collision));
+ hlist_del_init(&e->collision);
spin_unlock_irq(&mdev->req_lock);
dec_unacked(mdev);
@@ -1755,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
spin_lock_irq(&mdev->req_lock);
- hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+ hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
#define OVERLAPS overlaps(i->sector, i->size, sector, size)
slot = tl_hash_slot(mdev, sector);
@@ -1765,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
int have_conflict = 0;
prepare_to_wait(&mdev->misc_wait, &wait,
TASK_INTERRUPTIBLE);
- hlist_for_each_entry(i, n, slot, colision) {
+ hlist_for_each_entry(i, n, slot, collision) {
if (OVERLAPS) {
/* only ALERT on first iteration,
* we may be woken up early... */
@@ -1804,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
}
if (signal_pending(current)) {
- hlist_del_init(&e->colision);
+ hlist_del_init(&e->collision);
spin_unlock_irq(&mdev->req_lock);
@@ -1862,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
dev_err(DEV, "submit failed, triggering re-connect\n");
spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list);
- hlist_del_init(&e->colision);
+ hlist_del_init(&e->collision);
spin_unlock_irq(&mdev->req_lock);
if (e->flags & EE_CALL_AL_COMPLETE_IO)
drbd_al_complete_io(mdev, e->sector);
@@ -2916,12 +2911,6 @@ disconnect:
return false;
}
-static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
-{
- /* sorry, we currently have no working implementation
- * of distributed TCQ */
-}
-
/* warn if the arguments differ by more than 12.5% */
static void warn_if_differ_considerably(struct drbd_conf *mdev,
const char *s, sector_t a, sector_t b)
@@ -2939,7 +2928,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
{
struct p_sizes *p = &mdev->data.rbuf.sizes;
enum determine_dev_size dd = unchanged;
- unsigned int max_bio_size;
sector_t p_size, p_usize, my_usize;
int ldsc = 0; /* local disk size changed */
enum dds_flags ddsf;
@@ -2994,7 +2982,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(mdev)) {
- dd = drbd_determin_dev_size(mdev, ddsf);
+ dd = drbd_determine_dev_size(mdev, ddsf);
put_ldev(mdev);
if (dd == dev_size_error)
return false;
@@ -3004,23 +2992,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
drbd_set_my_capacity(mdev, p_size);
}
+ mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+ drbd_reconsider_max_bio_size(mdev);
+
if (get_ldev(mdev)) {
if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
ldsc = 1;
}
- if (mdev->agreed_pro_version < 94)
- max_bio_size = be32_to_cpu(p->max_bio_size);
- else if (mdev->agreed_pro_version == 94)
- max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
- else /* drbd 8.3.8 onwards */
- max_bio_size = DRBD_MAX_BIO_SIZE;
-
- if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
- drbd_setup_queue_param(mdev, max_bio_size);
-
- drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
put_ldev(mdev);
}
@@ -4275,7 +4255,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
struct hlist_node *n;
struct drbd_request *req;
- hlist_for_each_entry(req, n, slot, colision) {
+ hlist_for_each_entry(req, n, slot, collision) {
if ((unsigned long)req == (unsigned long)id) {
if (req->sector != sector) {
dev_err(DEV, "_ack_id_to_req: found req %p but it has "
@@ -4554,6 +4534,7 @@ int drbd_asender(struct drbd_thread *thi)
int received = 0;
int expect = sizeof(struct p_header80);
int empty;
+ int ping_timeout_active = 0;
sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
@@ -4566,6 +4547,7 @@ int drbd_asender(struct drbd_thread *thi)
ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
mdev->meta.socket->sk->sk_rcvtimeo =
mdev->net_conf->ping_timeo*HZ/10;
+ ping_timeout_active = 1;
}
/* conditionally cork;
@@ -4620,8 +4602,7 @@ int drbd_asender(struct drbd_thread *thi)
dev_err(DEV, "meta connection shut down by peer.\n");
goto reconnect;
} else if (rv == -EAGAIN) {
- if (mdev->meta.socket->sk->sk_rcvtimeo ==
- mdev->net_conf->ping_timeo*HZ/10) {
+ if (ping_timeout_active) {
dev_err(DEV, "PingAck did not arrive in time.\n");
goto reconnect;
}
@@ -4660,6 +4641,11 @@ int drbd_asender(struct drbd_thread *thi)
if (!cmd->process(mdev, h))
goto reconnect;
+ /* the idle_timeout (ping-int)
+ * has been restored in got_PingAck() */
+ if (cmd == get_asender_cmd(P_PING_ACK))
+ ping_timeout_active = 0;
+
buf = h;
received = 0;
expect = sizeof(struct p_header80);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5c0c8be1bb0a..3424d675b769 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
* they must have been failed on the spot */
#define OVERLAPS overlaps(sector, size, i->sector, i->size)
slot = tl_hash_slot(mdev, sector);
- hlist_for_each_entry(i, n, slot, colision) {
+ hlist_for_each_entry(i, n, slot, collision) {
if (OVERLAPS) {
dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
"other: %p %llus +%u\n",
@@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
#undef OVERLAPS
#define OVERLAPS overlaps(sector, size, e->sector, e->size)
slot = ee_hash_slot(mdev, req->sector);
- hlist_for_each_entry(e, n, slot, colision) {
+ hlist_for_each_entry(e, n, slot, collision) {
if (OVERLAPS) {
wake_up(&mdev->misc_wait);
break;
@@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
/* remove the request from the conflict detection
* respective block_id verification hash */
- if (!hlist_unhashed(&req->colision))
- hlist_del(&req->colision);
+ if (!hlist_unhashed(&req->collision))
+ hlist_del(&req->collision);
else
D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
@@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req)
struct hlist_node *n;
struct hlist_head *slot;
- D_ASSERT(hlist_unhashed(&req->colision));
+ D_ASSERT(hlist_unhashed(&req->collision));
if (!get_net_conf(mdev))
return 0;
@@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req)
#define OVERLAPS overlaps(i->sector, i->size, sector, size)
slot = tl_hash_slot(mdev, sector);
- hlist_for_each_entry(i, n, slot, colision) {
+ hlist_for_each_entry(i, n, slot, collision) {
if (OVERLAPS) {
dev_alert(DEV, "%s[%u] Concurrent local write detected! "
"[DISCARD L] new: %llus +%u; "
@@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req)
#undef OVERLAPS
#define OVERLAPS overlaps(e->sector, e->size, sector, size)
slot = ee_hash_slot(mdev, sector);
- hlist_for_each_entry(e, n, slot, colision) {
+ hlist_for_each_entry(e, n, slot, collision) {
if (OVERLAPS) {
dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
" [DISCARD L] new: %llus +%u; "
@@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* so we can verify the handle in the answer packet
* corresponding hlist_del is in _req_may_be_done() */
- hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+ hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
set_bit(UNPLUG_REMOTE, &mdev->flags);
@@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* assert something? */
/* from drbd_make_request_common only */
- hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+ hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
/* corresponding hlist_del is in _req_may_be_done() */
/* NOTE
@@ -1033,7 +1033,7 @@ fail_conflicting:
err = 0;
fail_free_complete:
- if (rw == WRITE && local)
+ if (req->rq_state & RQ_IN_ACT_LOG)
drbd_al_complete_io(mdev, sector);
fail_and_free_req:
if (local) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 32e2c3e6a813..68a234a5fdc5 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
struct hlist_node *n;
struct drbd_request *req;
- hlist_for_each_entry(req, n, slot, colision) {
+ hlist_for_each_entry(req, n, slot, collision) {
if ((unsigned long)req == (unsigned long)id) {
D_ASSERT(req->sector == sector);
return req;
@@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
req->epoch = 0;
req->sector = bio_src->bi_sector;
req->size = bio_src->bi_size;
- INIT_HLIST_NODE(&req->colision);
+ INIT_HLIST_NODE(&req->collision);
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
}
@@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
extern void complete_master_bio(struct drbd_conf *mdev,
struct bio_and_error *m);
extern void request_timer_fn(unsigned long data);
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f7e6c92f8d03..4d76b06b6b20 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
list_del(&e->w.list); /* has been on active_ee or sync_ee */
list_add_tail(&e->w.list, &mdev->done_ee);
- /* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+ /* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
* neither did we wake possibly waiting conflicting requests.
* done from "drbd_process_done_ee" within the appropriate w.cb
* (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
@@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
crypto_hash_final(&desc, digest);
}
-static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+/* TODO merge common code with w_e_end_ov_req */
+int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
int digest_size;
void *digest;
- int ok;
+ int ok = 1;
D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
- if (unlikely(cancel)) {
- drbd_free_ee(mdev, e);
- return 1;
- }
+ if (unlikely(cancel))
+ goto out;
- if (likely((e->flags & EE_WAS_ERROR) == 0)) {
- digest_size = crypto_hash_digestsize(mdev->csums_tfm);
- digest = kmalloc(digest_size, GFP_NOIO);
- if (digest) {
- drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+ if (likely((e->flags & EE_WAS_ERROR) != 0))
+ goto out;
- inc_rs_pending(mdev);
- ok = drbd_send_drequest_csum(mdev,
- e->sector,
- e->size,
- digest,
- digest_size,
- P_CSUM_RS_REQUEST);
- kfree(digest);
- } else {
- dev_err(DEV, "kmalloc() of digest failed.\n");
- ok = 0;
- }
- } else
- ok = 1;
+ digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+ digest = kmalloc(digest_size, GFP_NOIO);
+ if (digest) {
+ sector_t sector = e->sector;
+ unsigned int size = e->size;
+ drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+ /* Free e and pages before send.
+ * In case we block on congestion, we could otherwise run into
+ * some distributed deadlock, if the other side blocks on
+ * congestion as well, because our receiver blocks in
+ * drbd_pp_alloc due to pp_in_use > max_buffers. */
+ drbd_free_ee(mdev, e);
+ e = NULL;
+ inc_rs_pending(mdev);
+ ok = drbd_send_drequest_csum(mdev, sector, size,
+ digest, digest_size,
+ P_CSUM_RS_REQUEST);
+ kfree(digest);
+ } else {
+ dev_err(DEV, "kmalloc() of digest failed.\n");
+ ok = 0;
+ }
- drbd_free_ee(mdev, e);
+out:
+ if (e)
+ drbd_free_ee(mdev, e);
if (unlikely(!ok))
dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
@@ -834,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
const int ratio =
(t == 0) ? 0 :
(t < 100000) ? ((s*100)/t) : (s/(t/100));
- dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+ dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
"transferred %luK total %luK\n",
ratio,
Bit2KB(mdev->rs_same_csum),
@@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return ok;
}
+/* TODO merge common code with w_e_send_csum */
int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ sector_t sector = e->sector;
+ unsigned int size = e->size;
int digest_size;
void *digest;
int ok = 1;
@@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
else
memset(digest, 0, digest_size);
+ /* Free e and pages before send.
+ * In case we block on congestion, we could otherwise run into
+ * some distributed deadlock, if the other side blocks on
+ * congestion as well, because our receiver blocks in
+ * drbd_pp_alloc due to pp_in_use > max_buffers. */
+ drbd_free_ee(mdev, e);
+ e = NULL;
inc_rs_pending(mdev);
- ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
- digest, digest_size, P_OV_REPLY);
+ ok = drbd_send_drequest_csum(mdev, sector, size,
+ digest, digest_size,
+ P_OV_REPLY);
if (!ok)
dec_rs_pending(mdev);
kfree(digest);
out:
- drbd_free_ee(mdev, e);
+ if (e)
+ drbd_free_ee(mdev, e);
dec_unacked(mdev);
-
return ok;
}
@@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
struct digest_info *di;
- int digest_size;
void *digest;
+ sector_t sector = e->sector;
+ unsigned int size = e->size;
+ int digest_size;
int ok, eq = 0;
if (unlikely(cancel)) {
@@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
}
- dec_unacked(mdev);
+ /* Free e and pages before send.
+ * In case we block on congestion, we could otherwise run into
+ * some distributed deadlock, if the other side blocks on
+ * congestion as well, because our receiver blocks in
+ * drbd_pp_alloc due to pp_in_use > max_buffers. */
+ drbd_free_ee(mdev, e);
if (!eq)
- drbd_ov_oos_found(mdev, e->sector, e->size);
+ drbd_ov_oos_found(mdev, sector, size);
else
ov_oos_print(mdev);
- ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
+ ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
- drbd_free_ee(mdev, e);
+ dec_unacked(mdev);
--mdev->ov_left;