From c787f1baa5031c22cbe20af17b2ee36ad32957ea Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:34 -0500 Subject: block: Add PR callouts for read keys and reservation Add callouts for reading keys and reservations. This allows LIO to support the READ_KEYS and READ_RESERVATION commands so it can export devices to VMs for software like windows clustering. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-2-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/pr.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/pr.h b/include/linux/pr.h index 94ceec713afe..3003daec28a5 100644 --- a/include/linux/pr.h +++ b/include/linux/pr.h @@ -4,6 +4,18 @@ #include +struct pr_keys { + u32 generation; + u32 num_keys; + u64 keys[]; +}; + +struct pr_held_reservation { + u64 key; + u32 generation; + enum pr_type type; +}; + struct pr_ops { int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags); @@ -14,6 +26,19 @@ struct pr_ops { int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort); int (*pr_clear)(struct block_device *bdev, u64 key); + /* + * pr_read_keys - Read the registered keys and return them in the + * pr_keys->keys array. The keys array will have been allocated at the + * end of the pr_keys struct, and pr_keys->num_keys must be set to the + * number of keys the array can hold. If there are more than can fit + * in the array, success will still be returned and pr_keys->num_keys + * will reflect the total number of keys the device contains, so the + * caller can retry with a larger array. + */ + int (*pr_read_keys)(struct block_device *bdev, + struct pr_keys *keys_info); + int (*pr_read_reservation)(struct block_device *bdev, + struct pr_held_reservation *rsv); }; #endif /* LINUX_PR_H */ -- cgit v1.2.3 From 7ba150834b840f6f5cdd07ca69a4ccf39df59a66 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:35 -0500 Subject: block: Rename BLK_STS_NEXUS to BLK_STS_RESV_CONFLICT BLK_STS_NEXUS is used for NVMe/SCSI reservation conflicts and DASD's locking feature which works similar to NVMe/SCSI reservations where a host can get a lock on a device and when the lock is taken it will get failures. This patch renames BLK_STS_NEXUS so it better reflects this type of use. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-3-michael.christie@oracle.com Acked-by: Stefan Haberland Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/blk_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 99be590f952f..2b2452086a2f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -96,7 +96,7 @@ typedef u16 blk_short_t; #define BLK_STS_NOSPC ((__force blk_status_t)3) #define BLK_STS_TRANSPORT ((__force blk_status_t)4) #define BLK_STS_TARGET ((__force blk_status_t)5) -#define BLK_STS_NEXUS ((__force blk_status_t)6) +#define BLK_STS_RESV_CONFLICT ((__force blk_status_t)6) #define BLK_STS_MEDIUM ((__force blk_status_t)7) #define BLK_STS_PROTECTION ((__force blk_status_t)8) #define BLK_STS_RESOURCE ((__force blk_status_t)9) @@ -184,7 +184,7 @@ static inline bool blk_path_error(blk_status_t error) case BLK_STS_NOTSUPP: case BLK_STS_NOSPC: case BLK_STS_TARGET: - case BLK_STS_NEXUS: + case BLK_STS_RESV_CONFLICT: case BLK_STS_MEDIUM: case BLK_STS_PROTECTION: return false; -- cgit v1.2.3 From 0730b1632b7e803aad81ff19a4fda964a9d97053 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:37 -0500 Subject: scsi: Move sd_pr_type to scsi_common LIO is going to want to do the same block to/from SCSI pr types as sd.c so this moves the sd_pr_type helper to scsi_common and renames it. The next patch will then also add a helper to go from the SCSI value to the block one for use with PERSISTENT_RESERVE_IN commands. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-5-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/scsi_common.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 5b567b43e1b1..e25291bbbe9b 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -7,8 +7,20 @@ #define _SCSI_COMMON_H_ #include +#include #include +enum scsi_pr_type { + SCSI_PR_WRITE_EXCLUSIVE = 0x01, + SCSI_PR_EXCLUSIVE_ACCESS = 0x03, + SCSI_PR_WRITE_EXCLUSIVE_REG_ONLY = 0x05, + SCSI_PR_EXCLUSIVE_ACCESS_REG_ONLY = 0x06, + SCSI_PR_WRITE_EXCLUSIVE_ALL_REGS = 0x07, + SCSI_PR_EXCLUSIVE_ACCESS_ALL_REGS = 0x08, +}; + +enum scsi_pr_type block_pr_type_to_scsi(enum pr_type type); + static inline unsigned scsi_varlen_cdb_length(const void *hdr) { -- cgit v1.2.3 From 0af7b5e2362d3b67334f20e49138d89141dc24d3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:38 -0500 Subject: scsi: Add support for block PR read keys/reservation This adds support in sd.c for the block PR read keys and read reservation callouts, so upper layers like LIO can get the PR info that's been setup using the existing pr callouts and return it to initiators. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-6-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/scsi_common.h | 1 + include/scsi/scsi_proto.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index e25291bbbe9b..fb58715fac86 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -20,6 +20,7 @@ enum scsi_pr_type { }; enum scsi_pr_type block_pr_type_to_scsi(enum pr_type type); +enum pr_type scsi_pr_type_to_block(enum scsi_pr_type type); static inline unsigned scsi_varlen_cdb_length(const void *hdr) diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index fbe5bdfe4d6e..07d65c1f59db 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -151,6 +151,11 @@ #define ZO_FINISH_ZONE 0x02 #define ZO_OPEN_ZONE 0x03 #define ZO_RESET_WRITE_POINTER 0x04 +/* values for PR in service action */ +#define READ_KEYS 0x00 +#define READ_RESERVATION 0x01 +#define REPORT_CAPABILITES 0x02 +#define READ_FULL_STATUS 0x03 /* values for variable length command */ #define XDREAD_32 0x03 #define XDWRITE_32 0x04 -- cgit v1.2.3 From f2bf2e7e2d526116aab942aaf1b71a949a570ba6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:40 -0500 Subject: nvme: Fix reservation status related structs This fixes the following issues with the reservation status structs: 1. resv10 is bytes 23:10 so it should be 14 bytes. 2. regctl_ds only supports 64 bit host IDs. These are not currently used, but will be in this patchset which adds support for the reservation report command. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-8-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/nvme.h | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fad4aa245fb..57b5b2b8d95b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,20 +759,42 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +struct nvme_registered_ctrl { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 hostid; + __le64 rkey; +}; + struct nvme_reservation_status { __le32 gen; __u8 rtype; __u8 regctl[2]; __u8 resv5[2]; __u8 ptpls; - __u8 resv10[13]; - struct { - __le16 cntlid; - __u8 rcsts; - __u8 resv3[5]; - __le64 hostid; - __le64 rkey; - } regctl_ds[]; + __u8 resv10[14]; + struct nvme_registered_ctrl regctl_ds[]; +}; + +struct nvme_registered_ctrl_ext { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 rkey; + __u8 hostid[16]; + __u8 rsvd32[32]; +}; + +struct nvme_reservation_status_ext { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[14]; + __u8 rsvd24[40]; + struct nvme_registered_ctrl_ext regctl_eds[]; }; enum nvme_async_event_type { -- cgit v1.2.3 From 5fd96a4e15de8442915a912233d800c56f49001d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:44 -0500 Subject: nvme: Add pr_ops read_keys support This patch adds support for the pr_ops read_keys callout by calling the NVMe Reservation Report helper, then parsing that info to get the controller's registered keys. Because the callout is only used in the kernel where the callers, like LIO, do not know about controller/host IDs, the callout just returns the registered keys which is required by the SCSI PR in READ KEYS command. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-12-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 57b5b2b8d95b..a617e250d629 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,6 +759,10 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +enum nvme_eds { + NVME_EXTENDED_DATA_STRUCT = 0x1, +}; + struct nvme_registered_ctrl { __le16 cntlid; __u8 rcsts; -- cgit v1.2.3 From be1a7cd2d0ed028ffdd60c65e3734e2a1d8b17df Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:45 -0500 Subject: nvme: Add a nvme_pr_type enum The next patch adds support to report the reservation type, so we need to be able to convert from the NVMe PR value we get from the device to the linux block layer PR value that will be returned to callers. To prepare for that, this patch adds a nvme_pr_type enum and renames the nvme_pr_type function. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-13-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/nvme.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a617e250d629..4013abb86642 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,6 +759,15 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +enum nvme_pr_type { + NVME_PR_WRITE_EXCLUSIVE = 1, + NVME_PR_EXCLUSIVE_ACCESS = 2, + NVME_PR_WRITE_EXCLUSIVE_REG_ONLY = 3, + NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, + NVME_PR_WRITE_EXCLUSIVE_ALL_REGS = 5, + NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, +}; + enum nvme_eds { NVME_EXTENDED_DATA_STRUCT = 0x1, }; -- cgit v1.2.3 From 0217da08c1b904be49ac141442bbc1671d3630e7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:47 -0500 Subject: scsi: target: Rename sbc_ops to exec_cmd_ops The next patches allow us to call the block layer's pr_ops from the backends. This will require allowing the backends to hook into the cmd processing for SPC commands, so this renames sbc_ops to a more generic exec_cmd_ops. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-15-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/target/target_core_backend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index a3c193df25b3..c5df78959532 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -62,7 +62,7 @@ struct target_backend_ops { struct configfs_attribute **tb_dev_action_attrs; }; -struct sbc_ops { +struct exec_cmd_ops { sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *, u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); @@ -86,7 +86,7 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd); sense_reason_t spc_emulate_inquiry_std(struct se_cmd *, unsigned char *); sense_reason_t spc_emulate_evpd_83(struct se_cmd *, unsigned char *); -sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops); +sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct exec_cmd_ops *ops); u32 sbc_get_device_rev(struct se_device *dev); u32 sbc_get_device_type(struct se_device *dev); sector_t sbc_get_write_same_sectors(struct se_cmd *cmd); -- cgit v1.2.3 From 53062ace0b6e47f17cae2db453858c8a369a2fe4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:48 -0500 Subject: scsi: target: Allow backends to hook into PR handling For the cases where you want to export a device to a VM via a single I_T nexus and want to passthrough the PR handling to the physical/real device you have to use pscsi or tcmu. Both are good for specific uses however for the case where you want good performance, and are not using SCSI devices directly (using DM/MD RAID or multipath devices) then we are out of luck. The following patches allow iblock to mimimally hook into the LIO PR code and then pass the PR handling to the physical device. Note that like with the tcmu an pscsi cases it's only supported when you export the device via one I_T nexus. This patch adds the initial LIO callouts. The next patch will modify iblock. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-16-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/target/target_core_backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index c5df78959532..739df993aa5e 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -69,6 +69,10 @@ struct exec_cmd_ops { sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_unmap)(struct se_cmd *cmd, sector_t lba, sector_t nolb); + sense_reason_t (*execute_pr_out)(struct se_cmd *cmd, u8 sa, u64 key, + u64 sa_key, u8 type, bool aptpl); + sense_reason_t (*execute_pr_in)(struct se_cmd *cmd, u8 sa, + unsigned char *param_data); }; int transport_backend_register(const struct target_backend_ops *); -- cgit v1.2.3 From d9b3275bddd58f1e61171483c3625b5bd0841b71 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:49 -0500 Subject: scsi: target: Pass struct target_opcode_descriptor to enabled The iblock pr_ops support does not support commands that require port or I_T Nexus info. This adds a struct target_opcode_descriptor as an argument to the enabled callout so we can still have the common tcm_is_pr_enabled and tcm_is_scsi2_reservations_enabled functions and also determine if the command is supported based on the command and service action and device settings. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-17-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/target/target_core_base.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 12c9ba16217e..04646b3dbf75 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -878,7 +878,8 @@ struct target_opcode_descriptor { u8 specific_timeout; u16 nominal_timeout; u16 recommended_timeout; - bool (*enabled)(struct se_cmd *cmd); + bool (*enabled)(struct target_opcode_descriptor *descr, + struct se_cmd *cmd); void (*update_usage_bits)(u8 *usage_bits, struct se_device *dev); u8 usage_bits[]; -- cgit v1.2.3 From eca2040972b411ec27483bf75dc8b84e730e88ff Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:34 +0200 Subject: scsi: block: ioprio: Clean up interface definition The I/O priority user interface defines the 16-bits ioprio values as the combination of the upper 3-bits for an I/O priority class and the lower 13-bits as priority data. However, the kernel only uses the lower 3-bits of the priority data to define priority levels for the RT and BE priority classes. The data part of an ioprio value is completely ignored for the IDLE and NONE classes. This is enforced by checks done in ioprio_check_cap(), which is called for all paths that allow defining an I/O priority for I/Os: the per-context ioprio_set() system call, aio interface and io_uring interface. Clarify this fact in the uapi ioprio.h header file and introduce the IOPRIO_PRIO_LEVEL_MASK and IOPRIO_PRIO_LEVEL() macros for users to define and get priority levels in an ioprio value. The coarser macro IOPRIO_PRIO_DATA() is retained for backward compatibility with old applications already using it. There is no functional change introduced with this. In-kernel users of the IOPRIO_PRIO_DATA() macro which are explicitly handling I/O priority data as a priority level are modified to use the new IOPRIO_PRIO_LEVEL() macro without any functional change. Since f2fs is the only user of this macro not explicitly using that value as a priority level, it is left unchanged. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-2-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index f70f2596a6bf..4444b4e4fdad 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -17,7 +17,7 @@ ((data) & IOPRIO_PRIO_MASK)) /* - * These are the io priority groups as implemented by the BFQ and mq-deadline + * These are the io priority classes as implemented by the BFQ and mq-deadline * schedulers. RT is the realtime class, it always gets premium service. For * ATA disks supporting NCQ IO priority, RT class IOs will be processed using * high priority NCQ commands. BE is the best-effort scheduling class, the @@ -32,11 +32,20 @@ enum { }; /* - * The RT and BE priority classes both support up to 8 priority levels. + * The RT and BE priority classes both support up to 8 priority levels that + * can be specified using the lower 3-bits of the priority data. */ -#define IOPRIO_NR_LEVELS 8 -#define IOPRIO_BE_NR IOPRIO_NR_LEVELS +#define IOPRIO_LEVEL_NR_BITS 3 +#define IOPRIO_NR_LEVELS (1 << IOPRIO_LEVEL_NR_BITS) +#define IOPRIO_LEVEL_MASK (IOPRIO_NR_LEVELS - 1) +#define IOPRIO_PRIO_LEVEL(ioprio) ((ioprio) & IOPRIO_LEVEL_MASK) +#define IOPRIO_BE_NR IOPRIO_NR_LEVELS + +/* + * Possible values for the "which" argument of the ioprio_get() and + * ioprio_set() system calls (see "man ioprio_set"). + */ enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, @@ -44,7 +53,7 @@ enum { }; /* - * Fallback BE priority level. + * Fallback BE class priority level. */ #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM -- cgit v1.2.3 From 6c913257226a25879bfd6226e0ee265e98904ce6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:35 +0200 Subject: scsi: block: Introduce ioprio hints I/O priorities currently only use 6-bits of the 16-bits ioprio value: the 3-upper bits are used to define up to 8 priority classes (4 of which are valid) and the 3 lower bits of the value are used to define a priority level for the real-time and best-effort class. The remaining 10-bits between the I/O priority class and level are unused, and in fact, cannot be used by the user as doing so would either result in the value being completely ignored, or in an error returned by ioprio_check_cap(). Use these 10-bits of an ioprio value to allow a user to specify I/O hints. An I/O hint is defined as a 10-bitsvalue, allowing up to 1023 different hints to be specified, with the value 0 being reserved as the "no hint" case. An I/O hint can apply to any I/O that specifies a valid priority class other than NONE, regardless of the I/O priority level specified. To do so, the macros IOPRIO_PRIO_HINT() and IOPRIO_PRIO_VALUE_HINT() are introduced in include/uapi/linux/ioprio.h to respectively allow a user to get and set a hint in an ioprio value. To support the ATA and SCSI command duration limits feature, 7 hints are defined: IOPRIO_HINT_DEV_DURATION_LIMIT_1 to IOPRIO_HINT_DEV_DURATION_LIMIT_7, allowing a user to specify which command duration limit descriptor should be applied to the commands serving an I/O. Specifying these hints has for now no effect whatsoever if the target block devices do not support the command duration limits feature. However, in the future, block I/O schedulers can be modified to optimize I/O issuing order based on these hints, even for devices that do not support the command duration limits feature. Given that the 7 duration limits hints defined have no effect on any block layer component, the actual definition of the duration limits implied by these hints remains at the device level. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-3-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 4444b4e4fdad..4c4806e8230b 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -58,4 +58,53 @@ enum { #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM +/* + * The 10 bits between the priority class and the priority level are used to + * optionally define I/O hints for any combination of I/O priority class and + * level. Depending on the kernel configuration, I/O scheduler being used and + * the target I/O device being used, hints can influence how I/Os are processed + * without affecting the I/O scheduling ordering defined by the I/O priority + * class and level. + */ +#define IOPRIO_HINT_SHIFT IOPRIO_LEVEL_NR_BITS +#define IOPRIO_HINT_NR_BITS 10 +#define IOPRIO_NR_HINTS (1 << IOPRIO_HINT_NR_BITS) +#define IOPRIO_HINT_MASK (IOPRIO_NR_HINTS - 1) +#define IOPRIO_PRIO_HINT(ioprio) \ + (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK) + +/* + * Alternate macro for IOPRIO_PRIO_VALUE() to define an I/O priority with + * a class, level and hint. + */ +#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ + ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ + (((hint) & IOPRIO_HINT_MASK) << IOPRIO_HINT_SHIFT) | \ + ((level) & IOPRIO_LEVEL_MASK)) + +/* + * I/O hints. + */ +enum { + /* No hint */ + IOPRIO_HINT_NONE = 0, + + /* + * Device command duration limits: indicate to the device a desired + * duration limit for the commands that will be used to process an I/O. + * These will currently only be effective for SCSI and ATA devices that + * support the command duration limits feature. If this feature is + * enabled, then the commands issued to the device to process an I/O with + * one of these hints set will have the duration limit index (dld field) + * set to the value of the hint. + */ + IOPRIO_HINT_DEV_DURATION_LIMIT_1 = 1, + IOPRIO_HINT_DEV_DURATION_LIMIT_2 = 2, + IOPRIO_HINT_DEV_DURATION_LIMIT_3 = 3, + IOPRIO_HINT_DEV_DURATION_LIMIT_4 = 4, + IOPRIO_HINT_DEV_DURATION_LIMIT_5 = 5, + IOPRIO_HINT_DEV_DURATION_LIMIT_6 = 6, + IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7, +}; + #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From dffc480d2df1772d6092f46f2b4c5e0de941bd47 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:36 +0200 Subject: scsi: block: Introduce BLK_STS_DURATION_LIMIT Introduce the new block I/O status BLK_STS_DURATION_LIMIT for LLDDs to report command that failed due to a command duration limit being exceeded. This new status is mapped to the ETIME error code to allow users to differentiate "soft" duration limit failures from other more serious hardware related errors. If we compare BLK_STS_DURATION_LIMIT with BLK_STS_TIMEOUT: -BLK_STS_DURATION_LIMIT means that the drive gave a reply indicating that the command duration limit was exceeded before the command could be completed. This I/O status is mapped to ETIME for user space. -BLK_STS_TIMEOUT means that the drive never gave a reply at all. This I/O status is mapped to ETIMEDOUT for user space. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-4-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/linux/blk_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 740afe80f297..dfdcd218aaac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -171,6 +171,12 @@ typedef u16 blk_short_t; */ #define BLK_STS_OFFLINE ((__force blk_status_t)17) +/* + * BLK_STS_DURATION_LIMIT is returned from the driver when the target device + * aborted the command because it exceeded one of its Command Duration Limits. + */ +#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- cgit v1.2.3 From 3d848ca1ebc8d8864f25bd461914c93eff82a2d2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:37 +0200 Subject: scsi: core: Allow libata to complete successful commands via EH In SCSI, we get the sense data as part of the completion, for ATA however, we need to fetch the sense data as an extra step. For an aborted ATA command the sense data is fetched via libata's ->eh_strategy_handler(). For Command Duration Limits policy 0xD: The device shall complete the command without error with the additional sense code set to DATA CURRENTLY UNAVAILABLE. In order to handle this policy in libata, we intend to send a successful command via SCSI EH, and let libata's ->eh_strategy_handler() fetch the sense data for the good command. This is similar to how we handle an aborted ATA command, just that we need to read the Successful NCQ Commands log instead of the NCQ Command Error log. When we get a SATA completion with successful commands, ATA_SENSE will be set, indicating that some commands in the completion have sense data. The sense_valid bitmask in the Sense Data for Successful NCQ Commands log will inform exactly which commands that had sense data, which might be a subset of all the commands that was completed in the same completion. (Yet all will have ATA_SENSE set, since the status is per completion.) The successful commands that have e.g. a "DATA CURRENTLY UNAVAILABLE" sense data will have a SCSI ML byte set, so scsi_eh_flush_done_q() will not set the scmd->result to DID_TIME_OUT for these commands. However, the successful commands that did not have sense data, must not get their result marked as DID_TIME_OUT by SCSI EH. Add a new flag SCMD_FORCE_EH_SUCCESS, which tells SCSI EH to not mark a command as DID_TIME_OUT, even if it has scmd->result == SAM_STAT_GOOD. This will be used by libata in a subsequent commit. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-5-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index c2cb5f69635c..526def14e7fb 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -52,6 +52,11 @@ struct scsi_pointer { #define SCMD_TAGGED (1 << 0) #define SCMD_INITIALIZED (1 << 1) #define SCMD_LAST (1 << 2) +/* + * libata uses SCSI EH to fetch sense data for successful commands. + * SCSI EH should not overwrite scmd->result when SCMD_FORCE_EH_SUCCESS is set. + */ +#define SCMD_FORCE_EH_SUCCESS (1 << 3) #define SCMD_FAIL_IF_RECOVERING (1 << 4) /* flags preserved across unprep / reprep */ #define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING) -- cgit v1.2.3 From a6cdc35fab0d813d54744abe2af07d6c49c07d6e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:39 +0200 Subject: scsi: core: Support retrieving sub-pages of mode pages Allow scsi_mode_sense() to retrieve sub-pages of mode pages by adding the subpage argument. Change all the current caller sites to specify the subpage 0. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-7-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index f10a008e5bfa..c146cc807d44 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -421,10 +421,10 @@ extern int scsi_track_queue_full(struct scsi_device *, int); extern int scsi_set_medium_removal(struct scsi_device *, char); -extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, - unsigned char *buffer, int len, int timeout, - int retries, struct scsi_mode_data *data, - struct scsi_sense_hdr *); +int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, + int subpage, unsigned char *buffer, int len, int timeout, + int retries, struct scsi_mode_data *data, + struct scsi_sense_hdr *); extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, -- cgit v1.2.3 From 152e52fb6ff180e97d64585e87fea44c49b8bda8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:40 +0200 Subject: scsi: core: Support Service Action in scsi_report_opcode() The REPORT_SUPPORTED_OPERATION_CODES command allows checking for support of commands that have the same opcode but different service actions, such as READ 32 and WRITE 32. However, the current implementation of scsi_report_opcode() only allows checking an operation code without a service action differentiation. Add the "sa" argument to scsi_report_opcode() to allow passing a service action. If a non-zero service action is specified, the reporting options field value is set to 3 to have the service action field taken into account by the device. If no service action field is specified (zero), the reporting options field is set to 1 as before. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-8-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c146cc807d44..c93c5aaf637e 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -433,8 +433,9 @@ extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); extern int scsi_get_vpd_page(struct scsi_device *, u8 page, unsigned char *buf, int buf_len); -extern int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, - unsigned int len, unsigned char opcode); +int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, + unsigned int len, unsigned char opcode, + unsigned short sa); extern int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state); extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, -- cgit v1.2.3 From 624885209f31eb9985bf51abe204ecbffe2fdeea Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:41 +0200 Subject: scsi: core: Detect support for command duration limits Introduce the function scsi_cdl_check() to detect if a device supports command duration limits (CDL). Support for the READ 16, WRITE 16, READ 32 and WRITE 32 commands are checked using the function scsi_report_opcode() to probe the rwcdlp and cdlp bits as they indicate the mode page defining the command duration limits descriptors that apply to the command being tested. If any of these commands support CDL, the field cdl_supported of struct scsi_device is set to 1 to indicate that the device supports CDL. Support for CDL for a device is advertizes through sysfs using the new cdl_supported device attribute. This attribute value is 1 for a device supporting CDL and 0 otherwise. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-9-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c93c5aaf637e..6b8df9e253a0 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -218,6 +218,8 @@ struct scsi_device { unsigned silence_suspend:1; /* Do not print runtime PM related messages */ unsigned no_vpd_size:1; /* No VPD size reported in header */ + unsigned cdl_supported:1; /* Command duration limits supported */ + unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ @@ -364,6 +366,7 @@ extern int scsi_register_device_handler(struct scsi_device_handler *scsi_dh); extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); +void scsi_cdl_check(struct scsi_device *sdev); extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int __must_check scsi_device_get(struct scsi_device *); -- cgit v1.2.3 From 1b22cfb14142aba7742d307c4f8d7006f919308c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:42 +0200 Subject: scsi: core: Allow enabling and disabling command duration limits Add the sysfs scsi_device attribute cdl_enable to allow a user to enable or disable a device command duration limits feature. CDL is disabled by default. This feature must be explicitly enabled by a user by setting the cdl_enable attribute to 1. The new function scsi_cdl_enable() does not do anything beside setting the cdl_enable field of struct scsi_device in the case of a (real) SCSI device (e.g. a SAS HDD). For ATA devices, the command duration limits feature needs to be enabled/disabled using the ATA feature sub-page of the control mode page. To do so, the scsi_cdl_enable() function checks if this mode page is supported using scsi_mode_sense(). If it is, scsi_mode_select() is used to enable and disable CDL. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-10-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6b8df9e253a0..b2cdb078b7bd 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -219,6 +219,7 @@ struct scsi_device { unsigned no_vpd_size:1; /* No VPD size reported in header */ unsigned cdl_supported:1; /* Command duration limits supported */ + unsigned cdl_enable:1; /* Enable/disable Command duration limits */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ @@ -367,6 +368,7 @@ extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); void scsi_cdl_check(struct scsi_device *sdev); +int scsi_cdl_enable(struct scsi_device *sdev, bool enable); extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int __must_check scsi_device_get(struct scsi_device *); -- cgit v1.2.3 From 62e4a60e0cdb540b314061469e025fd834ff300c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:47 +0200 Subject: scsi: ata: libata: Detect support for command duration limits Use the supported capabilities identify device data log page to detect if a device supports the command duration limits feature. For devices supporting this feature, set the device flag ATA_DFLAG_CDL. To support SCSI-ATA translation, retrieve the command duration limits log page 18h and cache this page content using the cdl array added to the ata_device data structure. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-15-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/linux/ata.h | 5 ++++- include/linux/libata.h | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index c224dbddb9b2..1eda46b63dcc 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -322,15 +322,18 @@ enum { ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, + ATA_LOG_CDL = 0x18, + ATA_LOG_CDL_SIZE = ATA_SECT_SIZE, ATA_LOG_IDENTIFY_DEVICE = 0x30, ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ + ATA_LOG_SUPPORTED_CAPABILITIES = 0x03, ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - /* Identify device SATA settings log:*/ + /* Identify device SATA settings log: */ ATA_LOG_DEVSLP_OFFSET = 0x30, ATA_LOG_DEVSLP_SIZE = 0x08, ATA_LOG_DEVSLP_MDAT = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 311cd93377c7..e8a45f7f3f5c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -94,17 +94,18 @@ enum { ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ - ATA_DFLAG_CFG_MASK = (1 << 13) - 1, - - ATA_DFLAG_PIO = (1 << 13), /* device limited to PIO mode */ - ATA_DFLAG_NCQ_OFF = (1 << 14), /* device limited to non-NCQ mode */ - ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ - ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ - ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ - ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ - ATA_DFLAG_INIT_MASK = (1 << 19) - 1, - - ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 19), /* Priority cmds sent to dev */ + ATA_DFLAG_CDL = (1 << 13), /* supports cmd duration limits */ + ATA_DFLAG_CFG_MASK = (1 << 14) - 1, + + ATA_DFLAG_PIO = (1 << 14), /* device limited to PIO mode */ + ATA_DFLAG_NCQ_OFF = (1 << 15), /* device limited to non-NCQ mode */ + ATA_DFLAG_SLEEPING = (1 << 16), /* device is sleeping */ + ATA_DFLAG_DUBIOUS_XFER = (1 << 17), /* data transfer not verified */ + ATA_DFLAG_NO_UNLOAD = (1 << 18), /* device doesn't support unload */ + ATA_DFLAG_UNLOCK_HPA = (1 << 19), /* unlock HPA */ + ATA_DFLAG_INIT_MASK = (1 << 20) - 1, + + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ @@ -115,7 +116,8 @@ enum { ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ - ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA), + ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA | \ + ATA_DFLAG_CDL), ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -709,6 +711,9 @@ struct ata_device { /* Concurrent positioning ranges */ struct ata_cpr_log *cpr_log; + /* Command Duration Limits log support */ + u8 cdl[ATA_LOG_CDL_SIZE]; + /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ -- cgit v1.2.3 From df60f9c64576d6d05b59ec5c34addcd61ef1efb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:50 +0200 Subject: scsi: ata: libata: Add ATA feature control sub-page translation Add support for the ATA feature control sub-page of the control mode page to enable/disable the command duration limits feature using the cdl_ctrl field of the ATA feature control sub-page. Both mode sense and mode select translation are supported. For mode sense, the ata device flag ATA_DFLAG_CDL_ENABLED is used to cache the status of the command duration limits feature. Enabling this feature is done using a SET FEATURES command with a cdl action set to 1 when the page cdl_ctrl field value is 0x2 (T2A and T2B pages supported). If this field is 0, CDL is disabled using the SET FEATURES command with a cdl action set to 0. Since a device CDL and NCQ priority features should not be used simultaneously, ata_mselect_control_ata_feature() returns an error when attempting to enable CDL with the device priority feature enabled. Conversely, the function ata_ncq_prio_enable_store() used to enable the use of the device NCQ priority feature through sysfs is modified to return an error if the device CDL feature is enabled. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-18-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/linux/ata.h | 3 +++ include/linux/libata.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index 1eda46b63dcc..21108471c6af 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -329,6 +329,7 @@ enum { /* Identify device log pages: */ ATA_LOG_SUPPORTED_CAPABILITIES = 0x03, + ATA_LOG_CURRENT_SETTINGS = 0x04, ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, @@ -418,6 +419,8 @@ enum { SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */ SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ + SETFEATURES_CDL = 0x0d, /* Enable/disable cmd duration limits */ + /* SETFEATURE Sector counts for SATA features */ SATA_FPDMA_OFFSET = 0x01, /* FPDMA non-zero buffer offsets */ SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ diff --git a/include/linux/libata.h b/include/linux/libata.h index e8a45f7f3f5c..385ca23d5ad0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -106,6 +106,7 @@ enum { ATA_DFLAG_INIT_MASK = (1 << 20) - 1, ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ + ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ -- cgit v1.2.3 From eafe804bda7ba01da562c43351068b8a76a579af Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:51 +0200 Subject: scsi: ata: libata: Set read/write commands CDL index For devices supporting the command duration limits feature, translate the dld field of read and write operation to set the command duration limit index field of the command task file when the duration limit feature is enabled. The function ata_set_tf_cdl() is introduced to do this. For unqueued (non NCQ) read and write operations, this function sets the command duration limit index set as the lower 3 bits of the feature field. For queued NCQ read/write commands, the index is set as the lower 3 bits of the auxiliary field. The flag ATA_QCFLAG_HAS_CDL is introduced to indicate that a command taskfile has a non zero cdl field. Signed-off-by: Damien Le Moal Reviewed-by: Igor Pylypiv Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-19-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 385ca23d5ad0..f679abd2e61f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -209,6 +209,7 @@ enum { ATA_QCFLAG_CLEAR_EXCL = (1 << 5), /* clear excl_link on completion */ ATA_QCFLAG_QUIET = (1 << 6), /* don't report device error */ ATA_QCFLAG_RETRY = (1 << 7), /* retry after failure */ + ATA_QCFLAG_HAS_CDL = (1 << 8), /* qc has CDL a descriptor set */ ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ -- cgit v1.2.3 From 18bd7718b5c489b3161b6c2ab4685d57c1e2da3b Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:52 +0200 Subject: scsi: ata: libata: Handle completion of CDL commands using policy 0xD A CDL timeout for policy 0xF is defined as a NCQ error, just with a CDL specific sk/asc/ascq in the sense data. Therefore, the existing code in libata does not need to be modified to handle a policy 0xF CDL timeout. For Command Duration Limits policy 0xD: The device shall complete the command without error with the additional sense code set to DATA CURRENTLY UNAVAILABLE. Since a CDL timeout for policy 0xD is not an error, we cannot use the NCQ Command Error log (10h). Instead, we need to read the Sense Data for Successful NCQ Commands log (0Fh). In the success case, just like in the error case, we cannot simply read a log page from the interrupt handler itself, since reading a log page involves sending a READ LOG DMA EXT or READ LOG EXT command. Therefore, we add a new EH action ATA_EH_GET_SUCCESS_SENSE. When a command completes without error, and when the ATA_SENSE bit is set, this new action is set as pending, and EH is scheduled. This way, similar to the NCQ error case, the log page will be read from EH context. An alternative would have been to add a new kthread or workqueue to handle this. However, extending EH can be done with minimal changes and avoids the need to synchronize a new kthread/workqueue with EH. Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-20-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/linux/ata.h | 3 +++ include/linux/libata.h | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index 21108471c6af..792e10a09787 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -325,6 +325,8 @@ enum { ATA_LOG_CDL = 0x18, ATA_LOG_CDL_SIZE = ATA_SECT_SIZE, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_SENSE_NCQ = 0x0F, + ATA_LOG_SENSE_NCQ_SIZE = ATA_SECT_SIZE * 2, ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ @@ -431,6 +433,7 @@ enum { SATA_DEVSLP = 0x09, /* Device Sleep */ SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ + SETFEATURE_SENSE_DATA_SUCC_NCQ = 0xC4, /* Sense Data for successful NCQ commands */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index f679abd2e61f..5c8ef33b0af2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -214,6 +214,7 @@ enum { ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ + ATA_QCFLAG_EH_SUCCESS_CMD = (1 << 19), /* EH should fetch sense for this successful cmd */ /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ @@ -312,8 +313,10 @@ enum { ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ + ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | + ATA_EH_GET_SUCCESS_SENSE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -867,6 +870,7 @@ struct ata_port { struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif /* owned by EH */ + u8 *ncq_sense_buf; u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; @@ -1185,6 +1189,7 @@ extern int sata_link_hardreset(struct ata_link *link, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned long * @@ -1222,6 +1227,10 @@ static inline int sata_link_resume(struct ata_link *link, { return -EOPNOTSUPP; } +static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) +{ + return -EOPNOTSUPP; +} static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, -- cgit v1.2.3 From 8bb1c6243c4ba397958fe67837e075bd1bb8d3b4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 18 May 2023 12:31:58 -0700 Subject: scsi: core: Trace SCSI sense data If a command fails, SCSI sense data is essential to determine why it failed. Hence make the sense key, ASC and ASCQ codes available in the ftrace output. Cc: Niklas Cassel Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230518193159.1166304-3-bvanassche@acm.org Reviewed-by: Ming Lei Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- include/trace/events/scsi.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h index a2c7befd451a..8e2d9b1b0e77 100644 --- a/include/trace/events/scsi.h +++ b/include/trace/events/scsi.h @@ -269,9 +269,14 @@ DECLARE_EVENT_CLASS(scsi_cmd_done_timeout_template, __field( unsigned int, prot_sglen ) __field( unsigned char, prot_op ) __dynamic_array(unsigned char, cmnd, cmd->cmd_len) + __field( u8, sense_key ) + __field( u8, asc ) + __field( u8, ascq ) ), TP_fast_assign( + struct scsi_sense_hdr sshdr; + __entry->host_no = cmd->device->host->host_no; __entry->channel = cmd->device->channel; __entry->id = cmd->device->id; @@ -285,11 +290,22 @@ DECLARE_EVENT_CLASS(scsi_cmd_done_timeout_template, __entry->prot_sglen = scsi_prot_sg_count(cmd); __entry->prot_op = scsi_get_prot_op(cmd); memcpy(__get_dynamic_array(cmnd), cmd->cmnd, cmd->cmd_len); + if (cmd->sense_buffer && SCSI_SENSE_VALID(cmd) && + scsi_command_normalize_sense(cmd, &sshdr)) { + __entry->sense_key = sshdr.sense_key; + __entry->asc = sshdr.asc; + __entry->ascq = sshdr.ascq; + } else { + __entry->sense_key = 0; + __entry->asc = 0; + __entry->ascq = 0; + } ), TP_printk("host_no=%u channel=%u id=%u lun=%u data_sgl=%u prot_sgl=%u " \ "prot_op=%s driver_tag=%d scheduler_tag=%d cmnd=(%s %s raw=%s) " \ - "result=(driver=%s host=%s message=%s status=%s)", + "result=(driver=%s host=%s message=%s status=%s) " + "sense=(key=%#x asc=%#x ascq=%#x)", __entry->host_no, __entry->channel, __entry->id, __entry->lun, __entry->data_sglen, __entry->prot_sglen, show_prot_op_name(__entry->prot_op), __entry->driver_tag, @@ -299,7 +315,8 @@ DECLARE_EVENT_CLASS(scsi_cmd_done_timeout_template, "DRIVER_OK", show_hostbyte_name(((__entry->result) >> 16) & 0xff), "COMMAND_COMPLETE", - show_statusbyte_name(__entry->result & 0xff)) + show_statusbyte_name(__entry->result & 0xff), + __entry->sense_key, __entry->asc, __entry->ascq) ); DEFINE_EVENT(scsi_cmd_done_timeout_template, scsi_dispatch_cmd_done, -- cgit v1.2.3 From b125bb99559e3639764b8d169e3e9b80858fa2af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 29 May 2023 13:26:37 -0700 Subject: scsi: core: Support setting BLK_MQ_F_BLOCKING Prepare for adding code in ufshcd_queuecommand() that may sleep. This patch is similar to a patch posted last year by Mike Christie. See also https://lore.kernel.org/all/20220308003957.123312-2-michael.christie@oracle.com/ Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230529202640.11883-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 0f29799efa02..70b7475dcf56 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -458,6 +458,9 @@ struct scsi_host_template { /* True if the host uses host-wide tagspace */ unsigned host_tagset:1; + /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ + unsigned queuecommand_may_block:1; + /* * Countdown for host blocking with no commands outstanding. */ @@ -653,6 +656,9 @@ struct Scsi_Host { /* True if the host uses host-wide tagspace */ unsigned host_tagset:1; + /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ + unsigned queuecommand_may_block:1; + /* Host responded with short (<36 bytes) INQUIRY result */ unsigned short_inquiry:1; -- cgit v1.2.3 From 078f4f4b34d6c2dadabb363d3fc6c84b32927dea Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 29 May 2023 13:26:40 -0700 Subject: scsi: ufs: Ungate the clock synchronously Ungating the clock asynchronously causes ufshcd_queuecommand() to return SCSI_MLQUEUE_HOST_BUSY and hence causes commands to be requeued. This is suboptimal. Allow ufshcd_queuecommand() to sleep such that clock ungating does not trigger command requeuing. Remove the ufshcd_scsi_block_requests() and ufshcd_scsi_unblock_requests() calls because these are no longer needed. The flush_work(&hba->clk_gating.ungate_work) call is sufficient to make the SCSI core wait for clock ungating to complete. Acked-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230529202640.11883-6-bvanassche@acm.org Reviewed-by: Bean Huo Reviewed-by: Bao D. Nguyen Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index f7553293ba98..8039c2b72502 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1358,7 +1358,7 @@ void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, u8 **buf, bool ascii); -int ufshcd_hold(struct ufs_hba *hba, bool async); +void ufshcd_hold(struct ufs_hba *hba); void ufshcd_release(struct ufs_hba *hba); void ufshcd_clkgate_delay_set(struct device *dev, unsigned long value); -- cgit v1.2.3 From 23caa33d36e7e6f75597b333634d9e54fb40001b Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 31 May 2023 10:00:09 +0300 Subject: scsi: ufs: core: Do not open code SZ_x Do not open code SZ_x. Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20230531070009.4593-1-avri.altman@wdc.com Reviewed-by: Bean Huo Reviewed-by: Stanley Chu Reviewed-by: Keoseong Park Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 11424bb03814..db2d5db5c88e 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -453,7 +453,7 @@ enum { }; /* The maximum length of the data byte count field in the PRDT is 256KB */ -#define PRDT_DATA_BYTE_COUNT_MAX (256 * 1024) +#define PRDT_DATA_BYTE_COUNT_MAX SZ_256K /* The granularity of the data byte count field in the PRDT is 32-bit */ #define PRDT_DATA_BYTE_COUNT_PAD 4 -- cgit v1.2.3 From a8f9a36e46344ea5bdc301c2fde0389a463bf0a3 Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Mon, 29 May 2023 15:12:20 -0700 Subject: scsi: ufs: core: Combine 32-bit command_desc_base_addr_lo/hi The UTP command descriptor base address is a 57-bit field in the UTP transfer request descriptor. Combine the two 32-bit command_desc_base_addr_lo/hi fields into a 64-bit for better handling of this field. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/4e6f7f5a15000cdae77c3014b477264f57bf572c.1685396241.git.quic_nguyenb@quicinc.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Tested-by: Stanley Chu Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 11424bb03814..7c5a76b2c70a 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -503,8 +503,7 @@ struct request_desc_header { /** * struct utp_transfer_req_desc - UTP Transfer Request Descriptor (UTRD) * @header: UTRD header DW-0 to DW-3 - * @command_desc_base_addr_lo: UCD base address low DW-4 - * @command_desc_base_addr_hi: UCD base address high DW-5 + * @command_desc_base_addr: UCD base address DW 4-5 * @response_upiu_length: response UPIU length DW-6 * @response_upiu_offset: response UPIU offset DW-6 * @prd_table_length: Physical region descriptor length DW-7 @@ -516,8 +515,7 @@ struct utp_transfer_req_desc { struct request_desc_header header; /* DW 4-5*/ - __le32 command_desc_base_addr_lo; - __le32 command_desc_base_addr_hi; + __le64 command_desc_base_addr; /* DW 6 */ __le16 response_upiu_length; -- cgit v1.2.3 From 8d7290348992f27242dd6a696fa2eede709f0b14 Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Mon, 29 May 2023 15:12:22 -0700 Subject: scsi: ufs: mcq: Add supporting functions for MCQ abort Add supporting functions to handle UFS abort in MCQ mode. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/d452c5ad62dc863cc067ec82daa0885ec98bd508.1685396241.git.quic_nguyenb@quicinc.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Tested-by: Stanley Chu Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 3 +++ include/ufs/ufshci.h | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index f7553293ba98..145710e9c2a5 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1087,6 +1087,7 @@ struct ufs_hba { * @cq_tail_slot: current slot to which CQ tail pointer is pointing * @cq_head_slot: current slot to which CQ head pointer is pointing * @cq_lock: Synchronize between multiple polling instances + * @sq_mutex: prevent submission queue concurrent access */ struct ufs_hw_queue { void __iomem *mcq_sq_head; @@ -1105,6 +1106,8 @@ struct ufs_hw_queue { u32 cq_tail_slot; u32 cq_head_slot; spinlock_t cq_lock; + /* prevent concurrent access to submission queue */ + struct mutex sq_mutex; }; static inline bool is_mcq_enabled(struct ufs_hba *hba) diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 7c5a76b2c70a..9d291ca7f31d 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -99,6 +99,9 @@ enum { enum { REG_SQHP = 0x0, REG_SQTP = 0x4, + REG_SQRTC = 0x8, + REG_SQCTI = 0xC, + REG_SQRTS = 0x10, }; enum { @@ -111,12 +114,26 @@ enum { REG_CQIE = 0x4, }; +enum { + SQ_START = 0x0, + SQ_STOP = 0x1, + SQ_ICU = 0x2, +}; + +enum { + SQ_STS = 0x1, + SQ_CUS = 0x2, +}; + +#define SQ_ICU_ERR_CODE_MASK GENMASK(7, 4) +#define UPIU_COMMAND_TYPE_MASK GENMASK(31, 28) #define UFS_MASK(mask, offset) ((mask) << (offset)) /* UFS Version 08h */ #define MINOR_VERSION_NUM_MASK UFS_MASK(0xFFFF, 0) #define MAJOR_VERSION_NUM_MASK UFS_MASK(0xFFFF, 16) +#define UFSHCD_NUM_RESERVED 1 /* * Controller UFSHCI version * - 2.x and newer use the following scheme: -- cgit v1.2.3 From 57d6ef4601c0b7975aab5144c7c3760846362e1c Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Mon, 29 May 2023 15:12:25 -0700 Subject: scsi: ufs: mcq: Use ufshcd_mcq_poll_cqe_lock() in MCQ mode In preparation for adding MCQ error handler support, update the MCQ code to use the ufshcd_mcq_poll_cqe_lock() in interrupt context instead of using ufshcd_mcq_poll_cqe_nolock(). This is to keep synchronization between MCQ interrupt and error handler contexts because both need to access the MCQ hardware in separate contexts. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/6ae727ad2a4040469b8f0632b55e0577d80da11b.1685396241.git.quic_nguyenb@quicinc.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Tested-by: Stanley Chu Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 145710e9c2a5..12e3149617db 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1243,7 +1243,7 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); void ufshcd_schedule_eh_work(struct ufs_hba *hba); void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); -unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, +unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, struct ufs_hw_queue *hwq); void ufshcd_mcq_enable_esi(struct ufs_hba *hba); void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg); -- cgit v1.2.3 From 0818a6903c8081a17da4b1f50ff156537f99b02f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 24 May 2023 13:36:22 -0700 Subject: scsi: ufs: core: Simplify driver shutdown All UFS host drivers call ufshcd_shutdown(). Hence, instead of calling ufshcd_shutdown() from the host driver .shutdown() callback, inline that function into ufshcd_wl_shutdown(). Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230524203659.1394307-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index f7553293ba98..db2e669985d5 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1277,7 +1277,6 @@ extern int ufshcd_system_freeze(struct device *dev); extern int ufshcd_system_thaw(struct device *dev); extern int ufshcd_system_restore(struct device *dev); #endif -extern int ufshcd_shutdown(struct ufs_hba *hba); extern int ufshcd_dme_configure_adapt(struct ufs_hba *hba, int agreed_gear, -- cgit v1.2.3 From 01584c1e233740519d0e11aa20daa323d26bf598 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 8 Jun 2023 18:55:56 +0900 Subject: scsi: block: Improve ioprio value validity checks The introduction of the macro IOPRIO_PRIO_LEVEL() in commit eca2040972b4 ("scsi: block: ioprio: Clean up interface definition") results in an iopriority level to always be masked using the macro IOPRIO_LEVEL_MASK, and thus to the kernel always seeing an acceptable value for an I/O priority level when checked in ioprio_check_cap(). Before this patch, this function would return an error for some (but not all) invalid values for a level valid range of [0..7]. Restore and improve the detection of invalid priority levels by introducing the inline function ioprio_value() to check an ioprio class, level and hint value before combining these fields into a single value to be used with ioprio_set() or AIOs. If an invalid value for the class, level or hint of an ioprio is detected, ioprio_value() returns an ioprio using the class IOPRIO_CLASS_INVALID, indicating an invalid value and causing ioprio_check_cap() to return -EINVAL. Fixes: 6c913257226a ("scsi: block: Introduce ioprio hints") Fixes: eca2040972b4 ("scsi: block: ioprio: Clean up interface definition") Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230608095556.124001-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Reviewed-by: Linus Walleij Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 50 ++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 4c4806e8230b..99440b2e8c35 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -2,19 +2,20 @@ #ifndef _UAPI_LINUX_IOPRIO_H #define _UAPI_LINUX_IOPRIO_H +#include +#include + /* * Gives us 8 prio classes with 13-bits of data for each class */ #define IOPRIO_CLASS_SHIFT 13 -#define IOPRIO_CLASS_MASK 0x07 +#define IOPRIO_NR_CLASSES 8 +#define IOPRIO_CLASS_MASK (IOPRIO_NR_CLASSES - 1) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) #define IOPRIO_PRIO_CLASS(ioprio) \ (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK) #define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) \ - ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ - ((data) & IOPRIO_PRIO_MASK)) /* * These are the io priority classes as implemented by the BFQ and mq-deadline @@ -25,10 +26,13 @@ * served when no one else is using the disk. */ enum { - IOPRIO_CLASS_NONE, - IOPRIO_CLASS_RT, - IOPRIO_CLASS_BE, - IOPRIO_CLASS_IDLE, + IOPRIO_CLASS_NONE = 0, + IOPRIO_CLASS_RT = 1, + IOPRIO_CLASS_BE = 2, + IOPRIO_CLASS_IDLE = 3, + + /* Special class to indicate an invalid ioprio value */ + IOPRIO_CLASS_INVALID = 7, }; /* @@ -73,15 +77,6 @@ enum { #define IOPRIO_PRIO_HINT(ioprio) \ (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK) -/* - * Alternate macro for IOPRIO_PRIO_VALUE() to define an I/O priority with - * a class, level and hint. - */ -#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ - ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ - (((hint) & IOPRIO_HINT_MASK) << IOPRIO_HINT_SHIFT) | \ - ((level) & IOPRIO_LEVEL_MASK)) - /* * I/O hints. */ @@ -107,4 +102,25 @@ enum { IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7, }; +#define IOPRIO_BAD_VALUE(val, max) ((val) < 0 || (val) >= (max)) + +/* + * Return an I/O priority value based on a class, a level and a hint. + */ +static __always_inline __u16 ioprio_value(int class, int level, int hint) +{ + if (IOPRIO_BAD_VALUE(class, IOPRIO_NR_CLASSES) || + IOPRIO_BAD_VALUE(level, IOPRIO_NR_LEVELS) || + IOPRIO_BAD_VALUE(hint, IOPRIO_NR_HINTS)) + return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT; + + return (class << IOPRIO_CLASS_SHIFT) | + (hint << IOPRIO_HINT_SHIFT) | level; +} + +#define IOPRIO_PRIO_VALUE(class, level) \ + ioprio_value(class, level, IOPRIO_HINT_NONE) +#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ + ioprio_value(class, level, hint) + #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 0fef6bb730c490fcdc4347dbd21646d3ffe62cf5 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sat, 10 Jun 2023 10:15:51 +0800 Subject: scsi: ufs: core: mcq: Fix the incorrect OCS value for the device command In MCQ mode, when a device command uses a hardware queue shared with other commands, a race condition may occur in the following scenario: 1. A device command is completed in CQx with CQE entry "e". 2. The interrupt handler copies the "cqe" pointer to "hba->dev_cmd.cqe" and completes "hba->dev_cmd.complete". 3. The "ufshcd_wait_for_dev_cmd()" function is awakened and retrieves the OCS value from "hba->dev_cmd.cqe". However, there is a possibility that the CQE entry "e" will be overwritten by newly completed commands in CQx, resulting in an incorrect OCS value being received by "ufshcd_wait_for_dev_cmd()". To avoid this race condition, the OCS value should be immediately copied to the struct "lrb" of the device command. Then "ufshcd_wait_for_dev_cmd()" can retrieve the OCS value from the struct "lrb". Fixes: 57b1c0ef89ac ("scsi: ufs: core: mcq: Add support to allocate multiple queues") Suggested-by: Can Guo Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20230610021553.1213-2-powen.kao@mediatek.com Tested-by: Po-Wen Kao Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d65c9d07694d..92f073bda405 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -225,7 +225,6 @@ struct ufs_dev_cmd { struct mutex lock; struct completion *complete; struct ufs_query query; - struct cq_entry *cqe; }; /** -- cgit v1.2.3 From c4ad4f2e6646dcd29a1ff7ff682bf650a67b0335 Mon Sep 17 00:00:00 2001 From: Po-Wen Kao Date: Mon, 12 Jun 2023 16:58:09 +0800 Subject: scsi: ufs: core: Add host quirk UFSHCD_QUIRK_MCQ_BROKEN_INTR Quirk UFSHCD_QUIRK_MCQ_BROKEN_INTR is introduced for hosts that implement a different interrupt topology from the UFSHCI 4.0 spec. Some hosts raise per hw queue interrupt in addition to CQES (traditional) when ESI is disabled. Enabling this quirk will disable CQES and use only per hw queue interrupt. Signed-off-by: Po-Wen Kao Link: https://lore.kernel.org/r/20230612085817.12275-2-powen.kao@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 92f073bda405..5dc37e47f399 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -610,6 +610,13 @@ enum ufshcd_quirks { * to reinit the device after switching to maximum gear. */ UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH = 1 << 19, + + /* + * Some host raises interrupt (per queue) in addition to + * CQES (traditional) when ESI is disabled. + * Enable this quirk will disable CQES and use per queue interrupt. + */ + UFSHCD_QUIRK_MCQ_BROKEN_INTR = 1 << 20, }; enum ufshcd_caps { -- cgit v1.2.3 From aa9d5d0015a8b73aa557ab45933efe9cb68a3784 Mon Sep 17 00:00:00 2001 From: Po-Wen Kao Date: Mon, 12 Jun 2023 16:58:10 +0800 Subject: scsi: ufs: core: Add host quirk UFSHCD_QUIRK_MCQ_BROKEN_RTC Some hosts do not implement SQ Run Time Command (SQRTC) register, thus we need this quirk to skip the related flow. Signed-off-by: Po-Wen Kao Link: https://lore.kernel.org/r/20230612085817.12275-3-powen.kao@mediatek.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 5dc37e47f399..9674094d623d 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -617,6 +617,12 @@ enum ufshcd_quirks { * Enable this quirk will disable CQES and use per queue interrupt. */ UFSHCD_QUIRK_MCQ_BROKEN_INTR = 1 << 20, + + /* + * Some host does not implement SQ Run Time Command (SQRTC) register + * thus need this quirk to skip related flow. + */ + UFSHCD_QUIRK_MCQ_BROKEN_RTC = 1 << 21, }; enum ufshcd_caps { -- cgit v1.2.3 From 31950192d939a969415d0e1da4c62598023b0850 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 14 Jun 2023 12:36:15 +0200 Subject: scsi: core: Replace scsi_target_block() with scsi_block_targets() All callers (fc_remote_port_delete(), __iscsi_block_session(), __srp_start_tl_fail_timers(), srp_reconnect_rport(), snic_tgt_del()) pass parent devices of scsi_target devices to scsi_target_block(). Rename the function to scsi_block_targets(), and simplify it by assuming that it is always passed a parent device. Also, have callers pass the Scsi_Host pointer to scsi_block_targets(), as every caller has this pointer readily available. Suggested-by: Christoph Hellwig Suggested-by: Bart Van Assche Signed-off-by: Martin Wilck Link: https://lore.kernel.org/r/20230614103616.31857-7-mwilck@suse.com Cc: Karan Tilak Kumar Cc: Sesidhar Baddela Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index f10a008e5bfa..8bd5b00b33cc 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -450,7 +450,7 @@ extern void scsi_scan_target(struct device *parent, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan); extern void scsi_target_reap(struct scsi_target *); -extern void scsi_target_block(struct device *); +void scsi_block_targets(struct Scsi_Host *shost, struct device *dev); extern void scsi_target_unblock(struct device *, enum scsi_device_state); extern void scsi_remove_target(struct device *); extern const char *scsi_device_state_name(enum scsi_device_state); -- cgit v1.2.3