diff options
author | Matthew Sakai <msakai@redhat.com> | 2023-11-16 19:46:10 -0500 |
---|---|---|
committer | Mike Snitzer <snitzer@kernel.org> | 2024-02-20 13:43:13 -0500 |
commit | 4fcb4290dfe303de048fe01968513da6f4512147 (patch) | |
tree | c10d6a2d8b6e061d156c3c802849eb86ebfe1cd7 | |
parent | 03d1089e1dc7f05afb2bfc76bffc71de17e4297e (diff) | |
download | linux-stable-4fcb4290dfe303de048fe01968513da6f4512147.tar.gz linux-stable-4fcb4290dfe303de048fe01968513da6f4512147.tar.bz2 linux-stable-4fcb4290dfe303de048fe01968513da6f4512147.zip |
dm vdo: add vdo type declarations, constants, and simple data structures
Add definitions of constants defining the fixed parameters of a VDO
volume, and the default and maximum values of configurable or dynamic
parameters.
Add definitions of internal status codes used for internal
communication within the module and for logging.
Add definitions of types and structs used to manage the processing of
an I/O operation.
Co-developed-by: J. corwin Coburn <corwin@hurlbutnet.net>
Signed-off-by: J. corwin Coburn <corwin@hurlbutnet.net>
Co-developed-by: Michael Sclafani <dm-devel@lists.linux.dev>
Signed-off-by: Michael Sclafani <dm-devel@lists.linux.dev>
Signed-off-by: Matthew Sakai <msakai@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
-rw-r--r-- | drivers/md/dm-vdo/constants.c | 15 | ||||
-rw-r--r-- | drivers/md/dm-vdo/constants.h | 102 | ||||
-rw-r--r-- | drivers/md/dm-vdo/status-codes.c | 123 | ||||
-rw-r--r-- | drivers/md/dm-vdo/status-codes.h | 112 | ||||
-rw-r--r-- | drivers/md/dm-vdo/types.h | 393 | ||||
-rw-r--r-- | drivers/md/dm-vdo/wait-queue.c | 225 | ||||
-rw-r--r-- | drivers/md/dm-vdo/wait-queue.h | 131 |
7 files changed, 1101 insertions, 0 deletions
diff --git a/drivers/md/dm-vdo/constants.c b/drivers/md/dm-vdo/constants.c new file mode 100644 index 000000000000..14ac047101f8 --- /dev/null +++ b/drivers/md/dm-vdo/constants.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Red Hat + */ + +#include "types.h" + +/* The maximum logical space is 4 petabytes, which is 1 terablock. */ +const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024; + +/* The maximum physical space is 256 terabytes, which is 64 gigablocks. */ +const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64; + +/* unit test minimum */ +const block_count_t MINIMUM_VDO_SLAB_JOURNAL_BLOCKS = 2; diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h new file mode 100644 index 000000000000..9ae3917d00de --- /dev/null +++ b/drivers/md/dm-vdo/constants.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2023 Red Hat + */ + +#ifndef VDO_CONSTANTS_H +#define VDO_CONSTANTS_H + +#include <linux/blkdev.h> + +#include "types.h" + +enum { + /* + * The maximum number of contiguous PBNs which will go to a single bio submission queue, + * assuming there is more than one queue. + */ + VDO_BIO_ROTATION_INTERVAL_LIMIT = 1024, + + /** The number of entries on a block map page */ + VDO_BLOCK_MAP_ENTRIES_PER_PAGE = 812, + + /** The origin of the flat portion of the block map */ + VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN = 1, + + /* + * The height of a block map tree. Assuming a root count of 60 and 812 entries per page, + * this is big enough to represent almost 95 PB of logical space. + */ + VDO_BLOCK_MAP_TREE_HEIGHT = 5, + + /** The default number of bio submission queues. */ + DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT = 4, + + /** The number of contiguous PBNs to be submitted to a single bio queue. */ + DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL = 64, + + /** The number of trees in the arboreal block map */ + DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT = 60, + + /** The default size of the recovery journal, in blocks */ + DEFAULT_VDO_RECOVERY_JOURNAL_SIZE = 32 * 1024, + + /** The default size of each slab journal, in blocks */ + DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224, + + /* + * The initial size of lbn_operations and pbn_operations, which is based upon the expected + * maximum number of outstanding VIOs. This value was chosen to make it highly unlikely + * that the maps would need to be resized. + */ + VDO_LOCK_MAP_CAPACITY = 10000, + + /** The maximum number of logical zones */ + MAX_VDO_LOGICAL_ZONES = 60, + + /** The maximum number of physical zones */ + MAX_VDO_PHYSICAL_ZONES = 16, + + /** The base-2 logarithm of the maximum blocks in one slab */ + MAX_VDO_SLAB_BITS = 23, + + /** The maximum number of slabs the slab depot supports */ + MAX_VDO_SLABS = 8192, + + /* + * The maximum number of block map pages to load simultaneously during recovery or rebuild. + */ + MAXIMUM_SIMULTANEOUS_VDO_BLOCK_MAP_RESTORATION_READS = 1024, + + /** The maximum number of entries in the slab summary */ + MAXIMUM_VDO_SLAB_SUMMARY_ENTRIES = MAX_VDO_SLABS * MAX_VDO_PHYSICAL_ZONES, + + /** The maximum number of total threads in a VDO thread configuration. */ + MAXIMUM_VDO_THREADS = 100, + + /** The maximum number of VIOs in the system at once */ + MAXIMUM_VDO_USER_VIOS = 2048, + + /** The only physical block size supported by VDO */ + VDO_BLOCK_SIZE = 4096, + + /** The number of sectors per block */ + VDO_SECTORS_PER_BLOCK = (VDO_BLOCK_SIZE >> SECTOR_SHIFT), + + /** The size of a sector that will not be torn */ + VDO_SECTOR_SIZE = 512, + + /** The physical block number reserved for storing the zero block */ + VDO_ZERO_BLOCK = 0, +}; + +/** The maximum logical space is 4 petabytes, which is 1 terablock. */ +extern const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS; + +/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */ +extern const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS; + +/** unit test minimum */ +extern const block_count_t MINIMUM_VDO_SLAB_JOURNAL_BLOCKS; + +#endif /* VDO_CONSTANTS_H */ diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c new file mode 100644 index 000000000000..33b1de29b205 --- /dev/null +++ b/drivers/md/dm-vdo/status-codes.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Red Hat + */ + +#include "status-codes.h" + +#include "errors.h" +#include "logger.h" +#include "permassert.h" +#include "uds-threads.h" + +const struct error_info vdo_status_list[] = { + { "VDO_NOT_IMPLEMENTED", "Not implemented" }, + { "VDO_OUT_OF_RANGE", "Out of range" }, + { "VDO_REF_COUNT_INVALID", "Reference count would become invalid" }, + { "VDO_NO_SPACE", "Out of space" }, + { "VDO_UNEXPECTED_EOF", "Unexpected EOF on block read" }, + { "VDO_BAD_CONFIGURATION", "Bad configuration option" }, + { "VDO_SOCKET_ERROR", "Socket error" }, + { "VDO_BAD_ALIGNMENT", "Mis-aligned block reference" }, + { "VDO_COMPONENT_BUSY", "Prior operation still in progress" }, + { "VDO_BAD_PAGE", "Corrupt or incorrect page" }, + { "VDO_UNSUPPORTED_VERSION", "Unsupported component version" }, + { "VDO_INCORRECT_COMPONENT", "Component id mismatch in decoder" }, + { "VDO_PARAMETER_MISMATCH", "Parameters have conflicting values" }, + { "VDO_BLOCK_SIZE_TOO_SMALL", "The block size is too small" }, + { "VDO_UNKNOWN_PARTITION", "No partition exists with a given id" }, + { "VDO_PARTITION_EXISTS", "A partition already exists with a given id" }, + { "VDO_NOT_READ_ONLY", "The device is not in read-only mode" }, + { "VDO_INCREMENT_TOO_SMALL", "Physical block growth of too few blocks" }, + { "VDO_CHECKSUM_MISMATCH", "Incorrect checksum" }, + { "VDO_RECOVERY_JOURNAL_FULL", "The recovery journal is full" }, + { "VDO_LOCK_ERROR", "A lock is held incorrectly" }, + { "VDO_READ_ONLY", "The device is in read-only mode" }, + { "VDO_SHUTTING_DOWN", "The device is shutting down" }, + { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" }, + { "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" }, + { "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" }, + { "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" }, + { "VDO_UNKNOWN_COMMAND", "The extended command is not known" }, + { "VDO_COMMAND_ERROR", "Bad extended command parameters" }, + { "VDO_CANNOT_DETERMINE_SIZE", "Cannot determine config sizes to fit" }, + { "VDO_BAD_MAPPING", "Invalid page mapping" }, + { "VDO_READ_CACHE_BUSY", "Read cache has no free slots" }, + { "VDO_BIO_CREATION_FAILED", "Bio creation failed" }, + { "VDO_BAD_MAGIC", "Bad magic number" }, + { "VDO_BAD_NONCE", "Bad nonce" }, + { "VDO_JOURNAL_OVERFLOW", "Journal sequence number overflow" }, + { "VDO_INVALID_ADMIN_STATE", "Invalid operation for current state" }, + { "VDO_CANT_ADD_SYSFS_NODE", "Failed to add sysfs node" }, +}; + +static atomic_t vdo_status_codes_registered = ATOMIC_INIT(0); +static int status_code_registration_result; + +static void do_status_code_registration(void) +{ + int result; + + BUILD_BUG_ON((VDO_STATUS_CODE_LAST - VDO_STATUS_CODE_BASE) != + ARRAY_SIZE(vdo_status_list)); + + result = uds_register_error_block("VDO Status", VDO_STATUS_CODE_BASE, + VDO_STATUS_CODE_BLOCK_END, vdo_status_list, + sizeof(vdo_status_list)); + /* + * The following test handles cases where libvdo is statically linked against both the test + * modules and the test driver (because multiple instances of this module call their own + * copy of this function once each, resulting in multiple calls to register_error_block + * which is shared in libuds). + */ + if (result == UDS_DUPLICATE_NAME) + result = UDS_SUCCESS; + + status_code_registration_result = (result == UDS_SUCCESS) ? VDO_SUCCESS : result; +} + +/** + * vdo_register_status_codes() - Register the VDO status codes if needed. + * Return: A success or error code. + */ +int vdo_register_status_codes(void) +{ + uds_perform_once(&vdo_status_codes_registered, do_status_code_registration); + return status_code_registration_result; +} + +/** + * vdo_map_to_system_error() - Given an error code, return a value we can return to the OS. + * @error: The error code to convert. + * + * The input error code may be a system-generated value (such as -EIO), an errno macro used in our + * code (such as EIO), or a UDS or VDO status code; the result must be something the rest of the OS + * can consume (negative errno values such as -EIO, in the case of the kernel). + * + * Return: A system error code value. + */ +int vdo_map_to_system_error(int error) +{ + char error_name[UDS_MAX_ERROR_NAME_SIZE]; + char error_message[UDS_MAX_ERROR_MESSAGE_SIZE]; + + /* 0 is success, negative a system error code */ + if (likely(error <= 0)) + return error; + if (error < 1024) + return -error; + + /* VDO or UDS error */ + switch (error) { + case VDO_NO_SPACE: + return -ENOSPC; + case VDO_READ_ONLY: + return -EIO; + default: + uds_log_info("%s: mapping internal status code %d (%s: %s) to EIO", + __func__, error, + uds_string_error_name(error, error_name, sizeof(error_name)), + uds_string_error(error, error_message, sizeof(error_message))); + return -EIO; + } +} diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h new file mode 100644 index 000000000000..c5da37ec8b55 --- /dev/null +++ b/drivers/md/dm-vdo/status-codes.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2023 Red Hat + */ + +#ifndef VDO_STATUS_CODES_H +#define VDO_STATUS_CODES_H + +#include "errors.h" + +enum { + UDS_BLOCK_SIZE = UDS_ERROR_CODE_BLOCK_END - UDS_ERROR_CODE_BASE, + VDO_BLOCK_START = UDS_ERROR_CODE_BLOCK_END, + VDO_BLOCK_END = VDO_BLOCK_START + UDS_BLOCK_SIZE, + PRP_BLOCK_START = VDO_BLOCK_END, + PRP_BLOCK_END = PRP_BLOCK_START + UDS_BLOCK_SIZE, +}; + +/* VDO-specific status codes. */ +enum vdo_status_codes { + /* successful result */ + VDO_SUCCESS, + /* base of all VDO errors */ + VDO_STATUS_CODE_BASE = VDO_BLOCK_START, + /* we haven't written this yet */ + VDO_NOT_IMPLEMENTED = VDO_STATUS_CODE_BASE, + /* input out of range */ + VDO_OUT_OF_RANGE, + /* an invalid reference count would result */ + VDO_REF_COUNT_INVALID, + /* a free block could not be allocated */ + VDO_NO_SPACE, + /* unexpected EOF on block read */ + VDO_UNEXPECTED_EOF, + /* improper or missing configuration option */ + VDO_BAD_CONFIGURATION, + /* socket opening or binding problem */ + VDO_SOCKET_ERROR, + /* read or write on non-aligned offset */ + VDO_BAD_ALIGNMENT, + /* prior operation still in progress */ + VDO_COMPONENT_BUSY, + /* page contents incorrect or corrupt data */ + VDO_BAD_PAGE, + /* unsupported version of some component */ + VDO_UNSUPPORTED_VERSION, + /* component id mismatch in decoder */ + VDO_INCORRECT_COMPONENT, + /* parameters have conflicting values */ + VDO_PARAMETER_MISMATCH, + /* the block size is too small */ + VDO_BLOCK_SIZE_TOO_SMALL, + /* no partition exists with a given id */ + VDO_UNKNOWN_PARTITION, + /* a partition already exists with a given id */ + VDO_PARTITION_EXISTS, + /* the VDO is not in read-only mode */ + VDO_NOT_READ_ONLY, + /* physical block growth of too few blocks */ + VDO_INCREMENT_TOO_SMALL, + /* incorrect checksum */ + VDO_CHECKSUM_MISMATCH, + /* the recovery journal is full */ + VDO_RECOVERY_JOURNAL_FULL, + /* a lock is held incorrectly */ + VDO_LOCK_ERROR, + /* the VDO is in read-only mode */ + VDO_READ_ONLY, + /* the VDO is shutting down */ + VDO_SHUTTING_DOWN, + /* the recovery journal has corrupt entries */ + VDO_CORRUPT_JOURNAL, + /* exceeds maximum number of slabs supported */ + VDO_TOO_MANY_SLABS, + /* a compressed block fragment is invalid */ + VDO_INVALID_FRAGMENT, + /* action is unsupported while rebuilding */ + VDO_RETRY_AFTER_REBUILD, + /* the extended command is not known */ + VDO_UNKNOWN_COMMAND, + /* bad extended command parameters */ + VDO_COMMAND_ERROR, + /* cannot determine sizes to fit */ + VDO_CANNOT_DETERMINE_SIZE, + /* a block map entry is invalid */ + VDO_BAD_MAPPING, + /* read cache has no free slots */ + VDO_READ_CACHE_BUSY, + /* bio_add_page failed */ + VDO_BIO_CREATION_FAILED, + /* bad magic number */ + VDO_BAD_MAGIC, + /* bad nonce */ + VDO_BAD_NONCE, + /* sequence number overflow */ + VDO_JOURNAL_OVERFLOW, + /* the VDO is not in a state to perform an admin operation */ + VDO_INVALID_ADMIN_STATE, + /* failure adding a sysfs node */ + VDO_CANT_ADD_SYSFS_NODE, + /* one more than last error code */ + VDO_STATUS_CODE_LAST, + VDO_STATUS_CODE_BLOCK_END = VDO_BLOCK_END +}; + +extern const struct error_info vdo_status_list[]; + +int vdo_register_status_codes(void); + +int vdo_map_to_system_error(int error); + +#endif /* VDO_STATUS_CODES_H */ diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h new file mode 100644 index 000000000000..abc6d36f6522 --- /dev/null +++ b/drivers/md/dm-vdo/types.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2023 Red Hat + */ + +#ifndef VDO_TYPES_H +#define VDO_TYPES_H + +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/device-mapper.h> +#include <linux/list.h> +#include <linux/compiler_attributes.h> +#include <linux/types.h> + +#include "funnel-queue.h" + +/* A size type in blocks. */ +typedef u64 block_count_t; + +/* The size of a block. */ +typedef u16 block_size_t; + +/* A counter for data_vios */ +typedef u16 data_vio_count_t; + +/* A height within a tree. */ +typedef u8 height_t; + +/* The logical block number as used by the consumer. */ +typedef u64 logical_block_number_t; + +/* The type of the nonce used to identify instances of VDO. */ +typedef u64 nonce_t; + +/* A size in pages. */ +typedef u32 page_count_t; + +/* A page number. */ +typedef u32 page_number_t; + +/* + * The physical (well, less logical) block number at which the block is found on the underlying + * device. + */ +typedef u64 physical_block_number_t; + +/* A count of tree roots. */ +typedef u8 root_count_t; + +/* A number of sectors. */ +typedef u8 sector_count_t; + +/* A sequence number. */ +typedef u64 sequence_number_t; + +/* The offset of a block within a slab. */ +typedef u32 slab_block_number; + +/* A size type in slabs. */ +typedef u16 slab_count_t; + +/* A slot in a bin or block map page. */ +typedef u16 slot_number_t; + +/* typedef thread_count_t - A thread counter. */ +typedef u8 thread_count_t; + +/* typedef thread_id_t - A thread ID, vdo threads are numbered sequentially from 0. */ +typedef u8 thread_id_t; + +/* A zone counter */ +typedef u8 zone_count_t; + +/* The following enums are persisted on storage, so the values must be preserved. */ + +/* The current operating mode of the VDO. */ +enum vdo_state { + VDO_DIRTY = 0, + VDO_NEW = 1, + VDO_CLEAN = 2, + VDO_READ_ONLY_MODE = 3, + VDO_FORCE_REBUILD = 4, + VDO_RECOVERING = 5, + VDO_REPLAYING = 6, /* VDO_REPLAYING is never set anymore, but retained for upgrade */ + VDO_REBUILD_FOR_UPGRADE = 7, + + /* Keep VDO_STATE_COUNT at the bottom. */ + VDO_STATE_COUNT +}; + +/** + * vdo_state_requires_read_only_rebuild() - Check whether a vdo_state indicates + * that a read-only rebuild is required. + * @state: The vdo_state to check. + * + * Return: true if the state indicates a rebuild is required + */ +static inline bool __must_check vdo_state_requires_read_only_rebuild(enum vdo_state state) +{ + return ((state == VDO_FORCE_REBUILD) || (state == VDO_REBUILD_FOR_UPGRADE)); +} + +/** + * vdo_state_requires_recovery() - Check whether a vdo state indicates that recovery is needed. + * @state: The state to check. + * + * Return: true if the state indicates a recovery is required + */ +static inline bool __must_check vdo_state_requires_recovery(enum vdo_state state) +{ + return ((state == VDO_DIRTY) || (state == VDO_REPLAYING) || (state == VDO_RECOVERING)); +} + +/* + * The current operation on a physical block (from the point of view of the recovery journal, slab + * journals, and reference counts. + */ +enum journal_operation { + VDO_JOURNAL_DATA_REMAPPING = 0, + VDO_JOURNAL_BLOCK_MAP_REMAPPING = 1, +} __packed; + +/* Partition IDs encoded in the volume layout in the super block. */ +enum partition_id { + VDO_BLOCK_MAP_PARTITION = 0, + VDO_SLAB_DEPOT_PARTITION = 1, + VDO_RECOVERY_JOURNAL_PARTITION = 2, + VDO_SLAB_SUMMARY_PARTITION = 3, +} __packed; + +/* Metadata types for the vdo. */ +enum vdo_metadata_type { + VDO_METADATA_RECOVERY_JOURNAL = 1, + VDO_METADATA_SLAB_JOURNAL = 2, + VDO_METADATA_RECOVERY_JOURNAL_2 = 3, +} __packed; + +/* A position in the block map where a block map entry is stored. */ +struct block_map_slot { + physical_block_number_t pbn; + slot_number_t slot; +}; + +/* + * Four bits of each five-byte block map entry contain a mapping state value used to distinguish + * unmapped or trimmed logical blocks (which are treated as mapped to the zero block) from entries + * that have been mapped to a physical block, including the zero block. + * + * FIXME: these should maybe be defines. + */ +enum block_mapping_state { + VDO_MAPPING_STATE_UNMAPPED = 0, /* Must be zero to be the default value */ + VDO_MAPPING_STATE_UNCOMPRESSED = 1, /* A normal (uncompressed) block */ + VDO_MAPPING_STATE_COMPRESSED_BASE = 2, /* Compressed in slot 0 */ + VDO_MAPPING_STATE_COMPRESSED_MAX = 15, /* Compressed in slot 13 */ +}; + +enum { + VDO_MAX_COMPRESSION_SLOTS = + (VDO_MAPPING_STATE_COMPRESSED_MAX - VDO_MAPPING_STATE_COMPRESSED_BASE + 1), +}; + + +struct data_location { + physical_block_number_t pbn; + enum block_mapping_state state; +}; + +/* The configuration of a single slab derived from the configured block size and slab size. */ +struct slab_config { + /* total number of blocks in the slab */ + block_count_t slab_blocks; + /* number of blocks available for data */ + block_count_t data_blocks; + /* number of blocks for reference counts */ + block_count_t reference_count_blocks; + /* number of blocks for the slab journal */ + block_count_t slab_journal_blocks; + /* + * Number of blocks after which the slab journal starts pushing out a reference_block for + * each new entry it receives. + */ + block_count_t slab_journal_flushing_threshold; + /* + * Number of blocks after which the slab journal pushes out all reference_blocks and makes + * all vios wait. + */ + block_count_t slab_journal_blocking_threshold; + /* Number of blocks after which the slab must be scrubbed before coming online. */ + block_count_t slab_journal_scrubbing_threshold; +} __packed; + +/* + * This structure is memcmp'd for equality. Keep it packed and don't add any fields that are not + * properly set in both extant and parsed configs. + */ +struct thread_count_config { + unsigned int bio_ack_threads; + unsigned int bio_threads; + unsigned int bio_rotation_interval; + unsigned int cpu_threads; + unsigned int logical_zones; + unsigned int physical_zones; + unsigned int hash_zones; +} __packed; + +struct device_config { + struct dm_target *owning_target; + struct dm_dev *owned_device; + struct vdo *vdo; + /* All configs referencing a layer are kept on a list in the layer */ + struct list_head config_list; + char *original_string; + unsigned int version; + char *parent_device_name; + block_count_t physical_blocks; + /* + * This is the number of logical blocks from VDO's internal point of view. It is the number + * of 4K blocks regardless of the value of the logical_block_size parameter below. + */ + block_count_t logical_blocks; + unsigned int logical_block_size; + unsigned int cache_size; + unsigned int block_map_maximum_age; + bool deduplication; + bool compression; + struct thread_count_config thread_counts; + block_count_t max_discard_blocks; +}; + +enum vdo_completion_type { + /* Keep VDO_UNSET_COMPLETION_TYPE at the top. */ + VDO_UNSET_COMPLETION_TYPE, + VDO_ACTION_COMPLETION, + VDO_ADMIN_COMPLETION, + VDO_BLOCK_ALLOCATOR_COMPLETION, + VDO_DATA_VIO_POOL_COMPLETION, + VDO_DECREMENT_COMPLETION, + VDO_FLUSH_COMPLETION, + VDO_FLUSH_NOTIFICATION_COMPLETION, + VDO_GENERATION_FLUSHED_COMPLETION, + VDO_HASH_ZONE_COMPLETION, + VDO_HASH_ZONES_COMPLETION, + VDO_LOCK_COUNTER_COMPLETION, + VDO_PAGE_COMPLETION, + VDO_READ_ONLY_MODE_COMPLETION, + VDO_REPAIR_COMPLETION, + VDO_SYNC_COMPLETION, + VIO_COMPLETION, +} __packed; + +struct vdo_completion; + +/** + * typedef vdo_action_fn - An asynchronous VDO operation. + * @completion: The completion of the operation. + */ +typedef void (*vdo_action_fn)(struct vdo_completion *completion); + +enum vdo_completion_priority { + BIO_ACK_Q_ACK_PRIORITY = 0, + BIO_ACK_Q_MAX_PRIORITY = 0, + BIO_Q_COMPRESSED_DATA_PRIORITY = 0, + BIO_Q_DATA_PRIORITY = 0, + BIO_Q_FLUSH_PRIORITY = 2, + BIO_Q_HIGH_PRIORITY = 2, + BIO_Q_METADATA_PRIORITY = 1, + BIO_Q_VERIFY_PRIORITY = 1, + BIO_Q_MAX_PRIORITY = 2, + CPU_Q_COMPLETE_VIO_PRIORITY = 0, + CPU_Q_COMPLETE_READ_PRIORITY = 0, + CPU_Q_COMPRESS_BLOCK_PRIORITY = 0, + CPU_Q_EVENT_REPORTER_PRIORITY = 0, + CPU_Q_HASH_BLOCK_PRIORITY = 0, + CPU_Q_MAX_PRIORITY = 0, + UDS_Q_PRIORITY = 0, + UDS_Q_MAX_PRIORITY = 0, + VDO_DEFAULT_Q_COMPLETION_PRIORITY = 1, + VDO_DEFAULT_Q_FLUSH_PRIORITY = 2, + VDO_DEFAULT_Q_MAP_BIO_PRIORITY = 0, + VDO_DEFAULT_Q_SYNC_PRIORITY = 2, + VDO_DEFAULT_Q_VIO_CALLBACK_PRIORITY = 1, + VDO_DEFAULT_Q_MAX_PRIORITY = 2, + /* The maximum allowable priority */ + VDO_WORK_Q_MAX_PRIORITY = 2, + /* A value which must be out of range for a valid priority */ + VDO_WORK_Q_DEFAULT_PRIORITY = VDO_WORK_Q_MAX_PRIORITY + 1, +}; + +struct vdo_completion { + /* The type of completion this is */ + enum vdo_completion_type type; + + /* + * <code>true</code> once the processing of the operation is complete. This flag should not + * be used by waiters external to the VDO base as it is used to gate calling the callback. + */ + bool complete; + + /* + * If true, queue this completion on the next callback invocation, even if it is already + * running on the correct thread. + */ + bool requeue; + + /* The ID of the thread which should run the next callback */ + thread_id_t callback_thread_id; + + /* The result of the operation */ + int result; + + /* The VDO on which this completion operates */ + struct vdo *vdo; + + /* The callback which will be called once the operation is complete */ + vdo_action_fn callback; + + /* Callback which, if set, will be called if an error result is set */ + vdo_action_fn error_handler; + + /* The parent object, if any, that spawned this completion */ + void *parent; + + /* Entry link for lock-free work queue */ + struct funnel_queue_entry work_queue_entry_link; + enum vdo_completion_priority priority; + struct vdo_work_queue *my_queue; +}; + +struct block_allocator; +struct data_vio; +struct vdo; +struct vdo_config; + +/* vio types for statistics and instrumentation. */ +enum vio_type { + VIO_TYPE_UNINITIALIZED = 0, + VIO_TYPE_DATA, + VIO_TYPE_BLOCK_ALLOCATOR, + VIO_TYPE_BLOCK_MAP, + VIO_TYPE_BLOCK_MAP_INTERIOR, + VIO_TYPE_GEOMETRY, + VIO_TYPE_PARTITION_COPY, + VIO_TYPE_RECOVERY_JOURNAL, + VIO_TYPE_SLAB_JOURNAL, + VIO_TYPE_SLAB_SUMMARY, + VIO_TYPE_SUPER_BLOCK, +} __packed; + +/* Priority levels for asynchronous I/O operations performed on a vio. */ +enum vio_priority { + VIO_PRIORITY_LOW = 0, + VIO_PRIORITY_DATA = VIO_PRIORITY_LOW, + VIO_PRIORITY_COMPRESSED_DATA = VIO_PRIORITY_DATA, + VIO_PRIORITY_METADATA, + VIO_PRIORITY_HIGH, +} __packed; + +/* + * A wrapper for a bio. All I/O to the storage below a vdo is conducted via vios. + */ +struct vio { + /* The completion for this vio */ + struct vdo_completion completion; + + /* The bio zone in which I/O should be processed */ + zone_count_t bio_zone; + + /* The queueing priority of the vio operation */ + enum vio_priority priority; + + /* The vio type is used for statistics and instrumentation. */ + enum vio_type type; + + /* The size of this vio in blocks */ + unsigned int block_count; + + /* The data being read or written. */ + char *data; + + /* The VDO-owned bio to use for all IO for this vio */ + struct bio *bio; + + /* + * A list of enqueued bios with consecutive block numbers, stored by vdo_submit_bio() under + * the first-enqueued vio. The other vios are found via their bio entries in this list, and + * are not added to the work queue as separate completions. + */ + struct bio_list bios_merged; +}; + +#endif /* VDO_TYPES_H */ diff --git a/drivers/md/dm-vdo/wait-queue.c b/drivers/md/dm-vdo/wait-queue.c new file mode 100644 index 000000000000..8acc24e79d2b --- /dev/null +++ b/drivers/md/dm-vdo/wait-queue.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Red Hat + */ + +#include "wait-queue.h" + +#include <linux/device-mapper.h> + +#include "permassert.h" + +#include "status-codes.h" + +/** + * vdo_enqueue_waiter() - Add a waiter to the tail end of a wait queue. + * @queue: The queue to which to add the waiter. + * @waiter: The waiter to add to the queue. + * + * The waiter must not already be waiting in a queue. + * + * Return: VDO_SUCCESS or an error code. + */ +void vdo_enqueue_waiter(struct wait_queue *queue, struct waiter *waiter) +{ + BUG_ON(waiter->next_waiter != NULL); + + if (queue->last_waiter == NULL) { + /* + * The queue is empty, so form the initial circular list by self-linking the + * initial waiter. + */ + waiter->next_waiter = waiter; + } else { + /* Splice the new waiter in at the end of the queue. */ + waiter->next_waiter = queue->last_waiter->next_waiter; + queue->last_waiter->next_waiter = waiter; + } + + /* In both cases, the waiter we added to the ring becomes the last waiter. */ + queue->last_waiter = waiter; + queue->queue_length += 1; +} + +/** + * vdo_transfer_all_waiters() - Transfer all waiters from one wait queue to a second queue, + * emptying the first queue. + * @from_queue: The queue containing the waiters to move. + * @to_queue: The queue that will receive the waiters from the first queue. + */ +void vdo_transfer_all_waiters(struct wait_queue *from_queue, struct wait_queue *to_queue) +{ + /* If the source queue is empty, there's nothing to do. */ + if (!vdo_has_waiters(from_queue)) + return; + + if (vdo_has_waiters(to_queue)) { + /* + * Both queues are non-empty. Splice the two circular lists together by swapping + * the next (head) pointers in the list tails. + */ + struct waiter *from_head = from_queue->last_waiter->next_waiter; + struct waiter *to_head = to_queue->last_waiter->next_waiter; + + to_queue->last_waiter->next_waiter = from_head; + from_queue->last_waiter->next_waiter = to_head; + } + + to_queue->last_waiter = from_queue->last_waiter; + to_queue->queue_length += from_queue->queue_length; + vdo_initialize_wait_queue(from_queue); +} + +/** + * vdo_notify_all_waiters() - Notify all the entries waiting in a queue. + * @queue: The wait queue containing the waiters to notify. + * @callback: The function to call to notify each waiter, or NULL to invoke the callback field + * registered in each waiter. + * @context: The context to pass to the callback function. + * + * Notifies all the entries waiting in a queue to continue execution by invoking a callback + * function on each of them in turn. The queue is copied and emptied before invoking any callbacks, + * and only the waiters that were in the queue at the start of the call will be notified. + */ +void vdo_notify_all_waiters(struct wait_queue *queue, waiter_callback_fn callback, + void *context) +{ + /* + * Copy and empty the queue first, avoiding the possibility of an infinite loop if entries + * are returned to the queue by the callback function. + */ + struct wait_queue waiters; + + vdo_initialize_wait_queue(&waiters); + vdo_transfer_all_waiters(queue, &waiters); + + /* Drain the copied queue, invoking the callback on every entry. */ + while (vdo_has_waiters(&waiters)) + vdo_notify_next_waiter(&waiters, callback, context); +} + +/** + * vdo_get_first_waiter() - Return the waiter that is at the head end of a wait queue. + * @queue: The queue from which to get the first waiter. + * + * Return: The first (oldest) waiter in the queue, or NULL if the queue is empty. + */ +struct waiter *vdo_get_first_waiter(const struct wait_queue *queue) +{ + struct waiter *last_waiter = queue->last_waiter; + + if (last_waiter == NULL) { + /* There are no waiters, so we're done. */ + return NULL; + } + + /* The queue is circular, so the last entry links to the head of the queue. */ + return last_waiter->next_waiter; +} + +/** + * vdo_dequeue_matching_waiters() - Remove all waiters that match based on the specified matching + * method and append them to a wait_queue. + * @queue: The wait queue to process. + * @match_method: The method to determine matching. + * @match_context: Contextual info for the match method. + * @matched_queue: A wait_queue to store matches. + */ +void vdo_dequeue_matching_waiters(struct wait_queue *queue, waiter_match_fn match_method, + void *match_context, struct wait_queue *matched_queue) +{ + struct wait_queue matched_waiters, iteration_queue; + + vdo_initialize_wait_queue(&matched_waiters); + + vdo_initialize_wait_queue(&iteration_queue); + vdo_transfer_all_waiters(queue, &iteration_queue); + while (vdo_has_waiters(&iteration_queue)) { + struct waiter *waiter = vdo_dequeue_next_waiter(&iteration_queue); + + vdo_enqueue_waiter((match_method(waiter, match_context) ? + &matched_waiters : queue), waiter); + } + + vdo_transfer_all_waiters(&matched_waiters, matched_queue); +} + +/** + * vdo_dequeue_next_waiter() - Remove the first waiter from the head end of a wait queue. + * @queue: The wait queue from which to remove the first entry. + * + * The caller will be responsible for waking the waiter by invoking the correct callback function + * to resume its execution. + * + * Return: The first (oldest) waiter in the queue, or NULL if the queue is empty. + */ +struct waiter *vdo_dequeue_next_waiter(struct wait_queue *queue) +{ + struct waiter *first_waiter = vdo_get_first_waiter(queue); + struct waiter *last_waiter = queue->last_waiter; + + if (first_waiter == NULL) + return NULL; + + if (first_waiter == last_waiter) { + /* The queue has a single entry, so just empty it out by nulling the tail. */ + queue->last_waiter = NULL; + } else { + /* + * The queue has more than one entry, so splice the first waiter out of the + * circular queue. + */ + last_waiter->next_waiter = first_waiter->next_waiter; + } + + /* The waiter is no longer in a wait queue. */ + first_waiter->next_waiter = NULL; + queue->queue_length -= 1; + + return first_waiter; +} + +/** + * vdo_notify_next_waiter() - Notify the next entry waiting in a queue. + * @queue: The wait queue containing the waiter to notify. + * @callback: The function to call to notify the waiter, or NULL to invoke the callback field + * registered in the waiter. + * @context: The context to pass to the callback function. + * + * Notifies the next entry waiting in a queue to continue execution by invoking a callback function + * on it after removing it from the queue. + * + * Return: true if there was a waiter in the queue. + */ +bool vdo_notify_next_waiter(struct wait_queue *queue, waiter_callback_fn callback, + void *context) +{ + struct waiter *waiter = vdo_dequeue_next_waiter(queue); + + if (waiter == NULL) + return false; + + if (callback == NULL) + callback = waiter->callback; + (*callback)(waiter, context); + + return true; +} + +/** + * vdo_get_next_waiter() - Get the waiter after this one, for debug iteration. + * @queue: The wait queue. + * @waiter: A waiter. + * + * Return: The next waiter, or NULL. + */ +const struct waiter *vdo_get_next_waiter(const struct wait_queue *queue, + const struct waiter *waiter) +{ + struct waiter *first_waiter = vdo_get_first_waiter(queue); + + if (waiter == NULL) + return first_waiter; + + return ((waiter->next_waiter != first_waiter) ? waiter->next_waiter : NULL); +} diff --git a/drivers/md/dm-vdo/wait-queue.h b/drivers/md/dm-vdo/wait-queue.h new file mode 100644 index 000000000000..50f1e2a1ea67 --- /dev/null +++ b/drivers/md/dm-vdo/wait-queue.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2023 Red Hat + */ + +#ifndef VDO_WAIT_QUEUE_H +#define VDO_WAIT_QUEUE_H + +#include <linux/compiler.h> +#include <linux/types.h> + +/** + * DOC: Wait queues. + * + * A wait queue is a circular list of entries waiting to be notified of a change in a condition. + * Keeping a circular list allows the queue structure to simply be a pointer to the tail (newest) + * entry in the queue, supporting constant-time enqueue and dequeue operations. A null pointer is + * an empty queue. + * + * An empty queue: + * queue0.last_waiter -> NULL + * + * A singleton queue: + * queue1.last_waiter -> entry1 -> entry1 -> [...] + * + * A three-element queue: + * queue2.last_waiter -> entry3 -> entry1 -> entry2 -> entry3 -> [...] + */ + +struct waiter; + +struct wait_queue { + /* The tail of the queue, the last (most recently added) entry */ + struct waiter *last_waiter; + /* The number of waiters currently in the queue */ + size_t queue_length; +}; + +/** + * typedef waiter_callback_fn - Callback type for functions which will be called to resume + * processing of a waiter after it has been removed from its wait + * queue. + */ +typedef void (*waiter_callback_fn)(struct waiter *waiter, void *context); + +/** + * typedef waiter_match_fn - Method type for waiter matching methods. + * + * A waiter_match_fn method returns false if the waiter does not match. + */ +typedef bool (*waiter_match_fn)(struct waiter *waiter, void *context); + +/* The queue entry structure for entries in a wait_queue. */ +struct waiter { + /* + * The next waiter in the queue. If this entry is the last waiter, then this is actually a + * pointer back to the head of the queue. + */ + struct waiter *next_waiter; + + /* Optional waiter-specific callback to invoke when waking this waiter. */ + waiter_callback_fn callback; +}; + +/** + * is_waiting() - Check whether a waiter is waiting. + * @waiter: The waiter to check. + * + * Return: true if the waiter is on some wait_queue. + */ +static inline bool vdo_is_waiting(struct waiter *waiter) +{ + return (waiter->next_waiter != NULL); +} + +/** + * initialize_wait_queue() - Initialize a wait queue. + * @queue: The queue to initialize. + */ +static inline void vdo_initialize_wait_queue(struct wait_queue *queue) +{ + *queue = (struct wait_queue) { + .last_waiter = NULL, + .queue_length = 0, + }; +} + +/** + * has_waiters() - Check whether a wait queue has any entries waiting in it. + * @queue: The queue to query. + * + * Return: true if there are any waiters in the queue. + */ +static inline bool __must_check vdo_has_waiters(const struct wait_queue *queue) +{ + return (queue->last_waiter != NULL); +} + +void vdo_enqueue_waiter(struct wait_queue *queue, struct waiter *waiter); + +void vdo_notify_all_waiters(struct wait_queue *queue, waiter_callback_fn callback, + void *context); + +bool vdo_notify_next_waiter(struct wait_queue *queue, waiter_callback_fn callback, + void *context); + +void vdo_transfer_all_waiters(struct wait_queue *from_queue, + struct wait_queue *to_queue); + +struct waiter *vdo_get_first_waiter(const struct wait_queue *queue); + +void vdo_dequeue_matching_waiters(struct wait_queue *queue, waiter_match_fn match_method, + void *match_context, struct wait_queue *matched_queue); + +struct waiter *vdo_dequeue_next_waiter(struct wait_queue *queue); + +/** + * count_waiters() - Count the number of waiters in a wait queue. + * @queue: The wait queue to query. + * + * Return: The number of waiters in the queue. + */ +static inline size_t __must_check vdo_count_waiters(const struct wait_queue *queue) +{ + return queue->queue_length; +} + +const struct waiter * __must_check vdo_get_next_waiter(const struct wait_queue *queue, + const struct waiter *waiter); + +#endif /* VDO_WAIT_QUEUE_H */ |