From 9487cfd3430d07366801886bdf185799a2b6f066 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 7 Feb 2018 17:39:14 +0100 Subject: s390/dasd: fix handling of internal requests Internal DASD device driver I/O such as query host access count or path verification is started using the _sleep_on() function. To mark a request as started or ended the callback_data is set to either DASD_SLEEPON_START_TAG or DASD_SLEEPON_END_TAG. In cases where the request has to be stopped unconditionally the status is set to DASD_SLEEPON_END_TAG as well which leads to immediate clearing of the request. But the request might still be on a device request queue for normal operation which might lead to a panic because of a BUG() statement in __dasd_device_process_final_queue() or a list corruption of the device request queue. Fix by removing the setting of DASD_SLEEPON_END_TAG in the dasd_cancel_req() and dasd_generic_requeue_all_requests() functions and ensure that the request is not deleted in the requeue function. Trigger the device tasklet in the requeue function and let the normal processing cleanup the request. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a7c15f0085e2..ecef8e73d40b 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2581,8 +2581,6 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr) case DASD_CQR_QUEUED: /* request was not started - just set to cleared */ cqr->status = DASD_CQR_CLEARED; - if (cqr->callback_data == DASD_SLEEPON_START_TAG) - cqr->callback_data = DASD_SLEEPON_END_TAG; break; case DASD_CQR_IN_IO: /* request in IO - terminate IO and release again */ @@ -3902,9 +3900,12 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) wait_event(dasd_flush_wq, (cqr->status != DASD_CQR_CLEAR_PENDING)); - /* mark sleepon requests as ended */ - if (cqr->callback_data == DASD_SLEEPON_START_TAG) - cqr->callback_data = DASD_SLEEPON_END_TAG; + /* + * requeue requests to blocklayer will only work + * for block device requests + */ + if (_dasd_requeue_request(cqr)) + continue; /* remove requests from device and block queue */ list_del_init(&cqr->devlist); @@ -3917,13 +3918,6 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) cqr = refers; } - /* - * requeue requests to blocklayer will only work - * for block device requests - */ - if (_dasd_requeue_request(cqr)) - continue; - if (cqr->block) list_del_init(&cqr->blocklist); cqr->block->base->discipline->free_cp( @@ -3940,8 +3934,7 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) list_splice_tail(&requeue_queue, &device->ccw_queue); spin_unlock_irq(get_ccwdev_lock(device->cdev)); } - /* wake up generic waitqueue for eventually ended sleepon requests */ - wake_up(&generic_waitq); + dasd_schedule_device_bh(device); return rc; } -- cgit v1.2.3 From f97a6b6c47d2f329a24f92cc0ca3c6df5727ba73 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 6 Feb 2018 14:59:43 +0100 Subject: s390/cio: fix ccw_device_start_timeout API There are cases a device driver can't start IO because the device is currently in use by cio. In this case the device driver is notified when the device is usable again. Using ccw_device_start_timeout we would set the timeout (and change an existing timeout) before we test for internal usage. Worst case this could lead to an unexpected timer deletion. Fix this by setting the timeout after we test for internal usage. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_ops.c | 72 +++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 40 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 1caf6a398760..75ce12a24dc2 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -159,7 +159,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) } /** - * ccw_device_start_key() - start a s390 channel program with key + * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key * @cdev: target ccw device * @cpa: logical start address of channel program * @intparm: user specific interruption parameter; will be presented back to @@ -170,10 +170,15 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) * @key: storage key to be used for the I/O * @flags: additional flags; defines the action to be performed for I/O * processing. + * @expires: timeout value in jiffies * * Start a S/390 channel program. When the interrupt arrives, the * IRQ handler is called, either immediately, delayed (dev-end missing, * or sense required) or never (no IRQ handler registered). + * This function notifies the device driver if the channel program has not + * completed during the time specified by @expires. If a timeout occurs, the + * channel program is terminated via xsch, hsch or csch, and the device's + * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). * Returns: * %0, if the operation was successful; * -%EBUSY, if the device is busy, or status pending; @@ -182,9 +187,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) * Context: * Interrupts disabled, ccw device lock held */ -int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags) +int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags, int expires) { struct subchannel *sch; int ret; @@ -224,6 +229,8 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, switch (ret) { case 0: cdev->private->intparm = intparm; + if (expires) + ccw_device_set_timeout(cdev, expires); break; case -EACCES: case -ENODEV: @@ -234,7 +241,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, } /** - * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key + * ccw_device_start_key() - start a s390 channel program with key * @cdev: target ccw device * @cpa: logical start address of channel program * @intparm: user specific interruption parameter; will be presented back to @@ -245,15 +252,10 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, * @key: storage key to be used for the I/O * @flags: additional flags; defines the action to be performed for I/O * processing. - * @expires: timeout value in jiffies * * Start a S/390 channel program. When the interrupt arrives, the * IRQ handler is called, either immediately, delayed (dev-end missing, * or sense required) or never (no IRQ handler registered). - * This function notifies the device driver if the channel program has not - * completed during the time specified by @expires. If a timeout occurs, the - * channel program is terminated via xsch, hsch or csch, and the device's - * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). * Returns: * %0, if the operation was successful; * -%EBUSY, if the device is busy, or status pending; @@ -262,19 +264,12 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, * Context: * Interrupts disabled, ccw device lock held */ -int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags, int expires) +int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags) { - int ret; - - if (!cdev) - return -ENODEV; - ccw_device_set_timeout(cdev, expires); - ret = ccw_device_start_key(cdev, cpa, intparm, lpm, key, flags); - if (ret != 0) - ccw_device_set_timeout(cdev, 0); - return ret; + return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, key, + flags, 0); } /** @@ -489,18 +484,20 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id) EXPORT_SYMBOL(ccw_device_get_id); /** - * ccw_device_tm_start_key() - perform start function + * ccw_device_tm_start_timeout_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler * @lpm: mask of paths to use * @key: storage key to use for storage access + * @expires: time span in jiffies after which to abort request * * Start the tcw on the given ccw device. Return zero on success, non-zero * otherwise. */ -int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, - unsigned long intparm, u8 lpm, u8 key) +int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key, + int expires) { struct subchannel *sch; int rc; @@ -527,37 +524,32 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, return -EACCES; } rc = cio_tm_start_key(sch, tcw, lpm, key); - if (rc == 0) + if (rc == 0) { cdev->private->intparm = intparm; + if (expires) + ccw_device_set_timeout(cdev, expires); + } return rc; } -EXPORT_SYMBOL(ccw_device_tm_start_key); +EXPORT_SYMBOL(ccw_device_tm_start_timeout_key); /** - * ccw_device_tm_start_timeout_key() - perform start function + * ccw_device_tm_start_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler * @lpm: mask of paths to use * @key: storage key to use for storage access - * @expires: time span in jiffies after which to abort request * * Start the tcw on the given ccw device. Return zero on success, non-zero * otherwise. */ -int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, - unsigned long intparm, u8 lpm, u8 key, - int expires) +int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key) { - int ret; - - ccw_device_set_timeout(cdev, expires); - ret = ccw_device_tm_start_key(cdev, tcw, intparm, lpm, key); - if (ret != 0) - ccw_device_set_timeout(cdev, 0); - return ret; + return ccw_device_tm_start_timeout_key(cdev, tcw, intparm, lpm, key, 0); } -EXPORT_SYMBOL(ccw_device_tm_start_timeout_key); +EXPORT_SYMBOL(ccw_device_tm_start_key); /** * ccw_device_tm_start() - perform start function -- cgit v1.2.3 From 770b55c995d171f026a9efb85e71e3b1ea47b93d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 7 Feb 2018 13:18:19 +0100 Subject: s390/cio: fix return code after missing interrupt When a timeout occurs for users of ccw_device_start_timeout we will stop the IO and call the drivers int handler with the irb pointer set to ERR_PTR(-ETIMEDOUT). Sometimes however we'd set the irb pointer to ERR_PTR(-EIO) which is not intended. Just set the correct value in all codepaths. Reported-by: Julian Wiedmann Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 6 ++++-- drivers/s390/cio/io_sch.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 1319122e9d12..384f085698a7 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -795,6 +795,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_set_timeout(cdev, 0); cdev->private->iretry = 255; + cdev->private->async_kill_io_rc = -ETIMEDOUT; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); @@ -871,7 +872,7 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) /* OK, i/o is dead now. Call interrupt handler. */ if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(cdev->private->async_kill_io_rc)); } static void @@ -888,7 +889,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_online_verify(cdev, 0); if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(cdev->private->async_kill_io_rc)); } void ccw_device_kill_io(struct ccw_device *cdev) @@ -896,6 +897,7 @@ void ccw_device_kill_io(struct ccw_device *cdev) int ret; cdev->private->iretry = 255; + cdev->private->async_kill_io_rc = -EIO; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index af571d8d6925..90e4e3a7841b 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -157,6 +157,7 @@ struct ccw_device_private { unsigned long intparm; /* user interruption parameter */ struct qdio_irq *qdio_data; struct irb irb; /* device status */ + int async_kill_io_rc; struct senseid senseid; /* SenseID info */ struct pgid pgid[8]; /* path group IDs per chpid*/ struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */ -- cgit v1.2.3 From 410d5e13e7638bc146321671e223d56495fbf3c7 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 12 Feb 2018 12:01:03 +0100 Subject: s390/cio: clear timer when terminating driver I/O When we terminate driver I/O (because we need to stop using a certain channel path) we also need to ensure that a timer (which may have been set up using ccw_device_start_timeout) is cleared. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 384f085698a7..9169af7dbb43 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -896,6 +896,7 @@ void ccw_device_kill_io(struct ccw_device *cdev) { int ret; + ccw_device_set_timeout(cdev, 0); cdev->private->iretry = 255; cdev->private->async_kill_io_rc = -EIO; ret = ccw_device_cancel_halt_clear(cdev); -- cgit v1.2.3 From 6be687395b3124f002a653c1a50b3260222b3cd7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:12 +0100 Subject: s390/qeth: free netdevice when removing a card On removal, a qeth card's netdevice is currently not properly freed because the call chain looks as follows: qeth_core_remove_device(card) lx_remove_device(card) unregister_netdev(card->dev) card->dev = NULL !!! qeth_core_free_card(card) if (card->dev) !!! free_netdev(card->dev) Fix it by free'ing the netdev straight after unregistering. This also fixes the sysfs-driven layer switch case (qeth_dev_layer2_store()), where the need to free the current netdevice was not considered at all. Note that free_netdev() takes care of the netif_napi_del() for us too. Fixes: 4a71df50047f ("qeth: new qeth device driver") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 -- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c8b308cfabf1..4a820afececd 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5087,8 +5087,6 @@ static void qeth_core_free_card(struct qeth_card *card) QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *)); qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); - if (card->dev) - free_netdev(card->dev); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); kfree(card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7f236440483f..5ef4c978ad19 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -915,8 +915,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } return; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 962a04b68dd2..b6b12220da71 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2865,8 +2865,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } -- cgit v1.2.3 From 1063e432bb45be209427ed3f1ca3908e4aa3c7d7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:13 +0100 Subject: s390/qeth: when thread completes, wake up all waiters qeth_wait_for_threads() is potentially called by multiple users, make sure to notify all of them after qeth_clear_thread_running_bit() adjusted the thread_running_mask. With no timeout, callers would otherwise stall. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/s390') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4a820afececd..42ee8806fa53 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -960,7 +960,7 @@ void qeth_clear_thread_running_bit(struct qeth_card *card, unsigned long thread) spin_lock_irqsave(&card->thread_mask_lock, flags); card->thread_running_mask &= ~thread; spin_unlock_irqrestore(&card->thread_mask_lock, flags); - wake_up(&card->wait_q); + wake_up_all(&card->wait_q); } EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit); -- cgit v1.2.3 From 17bf8c9b3d499d5168537c98b61eb7a1fcbca6c2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:14 +0100 Subject: s390/qeth: lock read device while queueing next buffer For calling ccw_device_start(), issue_next_read() needs to hold the device's ccwlock. This is satisfied for the IRQ handler path (where qeth_irq() gets called under the ccwlock), but we need explicit locking for the initial call by the MPC initialization. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 42ee8806fa53..2a9afaf8f264 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -527,8 +527,7 @@ static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue) queue == card->qdio.no_in_queues - 1; } - -static int qeth_issue_next_read(struct qeth_card *card) +static int __qeth_issue_next_read(struct qeth_card *card) { int rc; struct qeth_cmd_buffer *iob; @@ -559,6 +558,17 @@ static int qeth_issue_next_read(struct qeth_card *card) return rc; } +static int qeth_issue_next_read(struct qeth_card *card) +{ + int ret; + + spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card))); + ret = __qeth_issue_next_read(card); + spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card))); + + return ret; +} + static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) { struct qeth_reply *reply; @@ -1182,7 +1192,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, return; if (channel == &card->read && channel->state == CH_STATE_UP) - qeth_issue_next_read(card); + __qeth_issue_next_read(card); iob = channel->iob; index = channel->buf_no; -- cgit v1.2.3 From a6c3d93963e4b333c764fde69802c3ea9eaa9d5c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:15 +0100 Subject: s390/qeth: on channel error, reject further cmd requests When the IRQ handler determines that one of the cmd IO channels has failed and schedules recovery, block any further cmd requests from being submitted. The request would inevitably stall, and prevent the recovery from making progress until the request times out. This sort of error was observed after Live Guest Relocation, where the pending IO on the READ channel intentionally gets terminated to kick-start recovery. Simultaneously the guest executed SIOCETHTOOL, triggering qeth to issue a QUERY CARD INFO command. The command then stalled in the inoperabel WRITE channel. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/s390') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2a9afaf8f264..3653bea38470 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1174,6 +1174,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } rc = qeth_get_problem(cdev, irb); if (rc) { + card->read_or_write_problem = 1; qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; -- cgit v1.2.3