From df30781699f53e4fd4c494c6f7dd16e3d5c21d30 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 17 May 2018 19:14:43 +0200 Subject: scsi: zfcp: fix missing SCSI trace for result of eh_host_reset_handler For problem determination we need to see whether and why we were successful or not. This allows deduction of scsi_eh escalation. Example trace record formatted with zfcpdbf from s390-tools: Timestamp : ... Area : SCSI Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : schrh_r SCSI host reset handler result Request ID : 0x0000000000000000 none (invalid) SCSI ID : 0xffffffff none (invalid) SCSI LUN : 0xffffffff none (invalid) SCSI LUN high : 0xffffffff none (invalid) SCSI result : 0x00002002 field re-used for midlayer value: SUCCESS or in other cases: 0x2009 == FAST_IO_FAIL SCSI retries : 0xff none (invalid) SCSI allowed : 0xff none (invalid) SCSI scribble : 0xffffffffffffffff none (invalid) SCSI opcode : ffffffff ffffffff ffffffff ffffffff none (invalid) FCP rsp inf cod: 0xff none (invalid) FCP rsp IU : 00000000 00000000 00000000 00000000 none (invalid) 00000000 00000000 v2.6.35 commit a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") introduced the first return with something other than the previously hardcoded single SUCCESS return path. Signed-off-by: Steffen Maier Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") Cc: #2.6.38+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_dbf.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'drivers/s390/scsi/zfcp_dbf.c') diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index a8b831000b2d..1e5ea5e4992b 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -643,6 +643,46 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, spin_unlock_irqrestore(&dbf->scsi_lock, flags); } +/** + * zfcp_dbf_scsi_eh() - Trace event for special cases of scsi_eh callbacks. + * @tag: Identifier for event. + * @adapter: Pointer to zfcp adapter as context for this event. + * @scsi_id: SCSI ID/target to indicate scope of task management function (TMF). + * @ret: Return value of calling function. + * + * This SCSI trace variant does not depend on any of: + * scsi_cmnd, zfcp_fsf_req, scsi_device. + */ +void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter, + unsigned int scsi_id, int ret) +{ + struct zfcp_dbf *dbf = adapter->dbf; + struct zfcp_dbf_scsi *rec = &dbf->scsi_buf; + unsigned long flags; + static int const level = 1; + + if (unlikely(!debug_level_enabled(adapter->dbf->scsi, level))) + return; + + spin_lock_irqsave(&dbf->scsi_lock, flags); + memset(rec, 0, sizeof(*rec)); + + memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN); + rec->id = ZFCP_DBF_SCSI_CMND; + rec->scsi_result = ret; /* re-use field, int is 4 bytes and fits */ + rec->scsi_retries = ~0; + rec->scsi_allowed = ~0; + rec->fcp_rsp_info = ~0; + rec->scsi_id = scsi_id; + rec->scsi_lun = (u32)ZFCP_DBF_INVALID_LUN; + rec->scsi_lun_64_hi = (u32)(ZFCP_DBF_INVALID_LUN >> 32); + rec->host_scribble = ~0; + memset(rec->scsi_opcode, 0xff, ZFCP_DBF_SCSI_OPCODE); + + debug_event(dbf->scsi, level, rec, sizeof(*rec)); + spin_unlock_irqrestore(&dbf->scsi_lock, flags); +} + static debug_info_t *zfcp_dbf_reg(const char *name, int size, int rec_size) { struct debug_info *d; -- cgit v1.2.3 From 8221211863750b1afb1f464a264c05383b077a06 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 17 May 2018 19:14:50 +0200 Subject: scsi: zfcp: decouple SCSI traces for scsi_eh / TMF from scsi_cmnd The SCSI command pointer passed to scsi_eh callbacks is just one arbitrary command of potentially many that are in the eh queue to be processed. The command is only used to indirectly pass the TMF scope in terms of SCSI ID/target and SCSI LUN for LUN reset. Hence, zfcp had filled in SCSI trace record fields which do not really belong to the TMF. This was confusing. Therefore, refactor the TMF tracing to work without SCSI command. Since the FCP channel always requires a valid LUN handle, we use SCSI device as common context for any TMF (even target reset). To make it even clearer, we set all bits to 1 for the fields, which do not belong to the TMF, to indicate that these fields are invalid. The old zfcp_dbf_scsi() became zfcp_dbf_scsi_common() to now handle both SCSI commands and TMFs. The old argument scsi_cmnd is now optional and can be NULL with TMFs. The new argument scsi_device is mandatory to carry context, as well as SCSI ID/target and SCSI LUN in case of TMFs. New example trace record formatted with zfcpdbf from s390-tools: Timestamp : ... Area : SCSI Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : [lt]r_.... Request ID : 0x ID of FSF FCP request with TM flag For cases without FSF request: 0x0 for none (invalid) SCSI ID : 0x SCSI ID/target denoting scope SCSI LUN : 0x SCSI LUN denoting scope SCSI LUN high : 0x SCSI LUN denoting scope SCSI result : 0xffffffff none (invalid) SCSI retries : 0xff none (invalid) SCSI allowed : 0xff none (invalid) SCSI scribble : 0xffffffffffffffff none (invalid) SCSI opcode : ffffffff ffffffff ffffffff ffffffff none (invalid) FCP rsp inf cod: 0x00 FCP_RSP info code of TMF FCP rsp IU : 00000000 00000000 00000100 00000000 ext FCP_RSP IU 00000000 00000008 ext FCP_RSP IU FCP rsp IU len : 32 FCP_RSP IU length Payload time : ... FCP rsp IU all : 00000000 00000000 00000100 00000000 full FCP_RSP IU 00000000 00000008 00000000 00000000 full FCP_RSP IU Signed-off-by: Steffen Maier Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_dbf.c | 48 ++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'drivers/s390/scsi/zfcp_dbf.c') diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 1e5ea5e4992b..bb3373260169 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -578,16 +578,18 @@ void zfcp_dbf_san_in_els(char *tag, struct zfcp_fsf_req *fsf) } /** - * zfcp_dbf_scsi - trace event for scsi commands - * @tag: identifier for event - * @sc: pointer to struct scsi_cmnd - * @fsf: pointer to struct zfcp_fsf_req + * zfcp_dbf_scsi_common() - Common trace event helper for scsi. + * @tag: Identifier for event. + * @level: trace level of event. + * @sdev: Pointer to SCSI device as context for this event. + * @sc: Pointer to SCSI command, or NULL with task management function (TMF). + * @fsf: Pointer to FSF request, or NULL. */ -void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, - struct zfcp_fsf_req *fsf) +void zfcp_dbf_scsi_common(char *tag, int level, struct scsi_device *sdev, + struct scsi_cmnd *sc, struct zfcp_fsf_req *fsf) { struct zfcp_adapter *adapter = - (struct zfcp_adapter *) sc->device->host->hostdata[0]; + (struct zfcp_adapter *) sdev->host->hostdata[0]; struct zfcp_dbf *dbf = adapter->dbf; struct zfcp_dbf_scsi *rec = &dbf->scsi_buf; struct fcp_resp_with_ext *fcp_rsp; @@ -599,16 +601,28 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN); rec->id = ZFCP_DBF_SCSI_CMND; - rec->scsi_result = sc->result; - rec->scsi_retries = sc->retries; - rec->scsi_allowed = sc->allowed; - rec->scsi_id = sc->device->id; - rec->scsi_lun = (u32)sc->device->lun; - rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32); - rec->host_scribble = (unsigned long)sc->host_scribble; - - memcpy(rec->scsi_opcode, sc->cmnd, - min((int)sc->cmd_len, ZFCP_DBF_SCSI_OPCODE)); + if (sc) { + rec->scsi_result = sc->result; + rec->scsi_retries = sc->retries; + rec->scsi_allowed = sc->allowed; + rec->scsi_id = sc->device->id; + rec->scsi_lun = (u32)sc->device->lun; + rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32); + rec->host_scribble = (unsigned long)sc->host_scribble; + + memcpy(rec->scsi_opcode, sc->cmnd, + min_t(int, sc->cmd_len, ZFCP_DBF_SCSI_OPCODE)); + } else { + rec->scsi_result = ~0; + rec->scsi_retries = ~0; + rec->scsi_allowed = ~0; + rec->scsi_id = sdev->id; + rec->scsi_lun = (u32)sdev->lun; + rec->scsi_lun_64_hi = (u32)(sdev->lun >> 32); + rec->host_scribble = ~0; + + memset(rec->scsi_opcode, 0xff, ZFCP_DBF_SCSI_OPCODE); + } if (fsf) { rec->fsf_req_id = fsf->req_id; -- cgit v1.2.3 From 9e156c54ace310ce7fb1cd960e62416947f3d47c Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Thu, 17 May 2018 19:15:05 +0200 Subject: scsi: zfcp: assert that the ERP lock is held when tracing a recovery trigger Otherwise iterating with list_for_each() over the adapter->erp_ready_head and adapter->erp_running_head lists can lead to an infinite loop. See commit "zfcp: fix infinite iteration on erp_ready_head list". The run-time check is only performed for debug kernels which have the kernel lock validator enabled. Following is an example of the warning that is reported, if the ERP lock is not held when calling zfcp_dbf_rec_trig(): WARNING: CPU: 0 PID: 604 at drivers/s390/scsi/zfcp_dbf.c:288 zfcp_dbf_rec_trig+0x172/0x188 Modules linked in: ... CPU: 0 PID: 604 Comm: kworker/u128:3 Not tainted 4.16.0-... #1 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) Workqueue: zfcp_q_0.0.1906 zfcp_scsi_rport_work Krnl PSW : 00000000330fdbf9 00000000367e9728 (zfcp_dbf_rec_trig+0x172/0x188) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 RI:0 EA:3 Krnl GPRS: 00000000c57a5d99 3288200000000000 0000000000000000 000000006cc82740 00000000009d09d6 0000000000000000 00000000000000ff 0000000000000000 0000000000000000 0000000000e1b5fe 000000006de01d38 0000000076130958 000000006cc82548 000000006de01a98 00000000009d09d6 000000006a6d3c80 Krnl Code: 00000000009d0ad2: eb7ff0b80004 lmg %r7,%r15,184(%r15) 00000000009d0ad8: c0f4000d7dd0 brcl 15,b80678 #00000000009d0ade: a7f40001 brc 15,9d0ae0 >00000000009d0ae2: a7f4ff7d brc 15,9d09dc 00000000009d0ae6: e340f0f00004 lg %r4,240(%r15) 00000000009d0aec: eb7ff0b80004 lmg %r7,%r15,184(%r15) 00000000009d0af2: 07f4 bcr 15,%r4 00000000009d0af4: 0707 bcr 0,%r7 Call Trace: ([<00000000009d09d6>] zfcp_dbf_rec_trig+0x66/0x188) [<00000000009dd740>] zfcp_scsi_rport_work+0x98/0x190 [<0000000000169b34>] process_one_work+0x3d4/0x6f8 [<000000000016a08a>] worker_thread+0x232/0x418 [<000000000017219e>] kthread+0x166/0x178 [<0000000000b815ea>] kernel_thread_starter+0x6/0xc [<0000000000b815e4>] kernel_thread_starter+0x0/0xc 2 locks held by kworker/u128:3/604: #0: ((wq_completion)name){+.+.}, at: [<0000000082af1024>] process_one_work+0x1dc/0x6f8 #1: ((work_completion)(&port->rport_work)){+.+.}, at: [<0000000082af1024>] process_one_work+0x1dc/0x6f8 Last Breaking-Event-Address: [<00000000009d0ade>] zfcp_dbf_rec_trig+0x16e/0x188 ---[ end trace b2f4020572e2c124 ]--- Suggested-by: Steffen Maier Signed-off-by: Jens Remus Reviewed-by: Benjamin Block Reviewed-by: Steffen Maier Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_dbf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/s390/scsi/zfcp_dbf.c') diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index bb3373260169..781141bf2c28 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -285,6 +285,8 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, struct list_head *entry; unsigned long flags; + lockdep_assert_held(&adapter->erp_lock); + if (unlikely(!debug_level_enabled(dbf->rec, level))) return; -- cgit v1.2.3