From 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 25 Apr 2019 11:55:50 -0400 Subject: ext4: fix use-after-free race with debug_want_extra_isize When remounting with debug_want_extra_isize, we were not performing the same checks that we do during a normal mount. That allowed us to set a value for s_want_extra_isize that reached outside the s_inode_size. Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions") Reported-by: syzbot+f584efa0ac7213c226b7@syzkaller.appspotmail.com Reviewed-by: Jan Kara Signed-off-by: Barret Rhoden Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 58 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 24 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6ed4eb81e674..184944d4d8d1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3513,6 +3513,37 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } +static void ext4_clamp_want_extra_isize(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && + sbi->s_want_extra_isize == 0) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (ext4_has_feature_extra_isize(sb)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + ext4_msg(sb, KERN_INFO, + "required extra inode space not available"); + } +} + static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; @@ -4387,30 +4418,7 @@ no_journal: } else if (ret) goto failed_mount4a; - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - if (ext4_has_feature_extra_isize(sb)) { - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_want_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_want_extra_isize); - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_min_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_min_extra_isize); - } - } - /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - ext4_msg(sb, KERN_INFO, "required extra inode space not" - "available"); - } + ext4_clamp_want_extra_isize(sb); ext4_set_resv_clusters(sb); @@ -5194,6 +5202,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + ext4_clamp_want_extra_isize(sb); + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "changing journal_checksum " -- cgit v1.2.3 From c83ad55eaa91c8e85dd8cc3b7b3485fac45ef7bf Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 25 Apr 2019 14:05:42 -0400 Subject: ext4: include charset encoding information in the superblock Support for encoding is considered an incompatible feature, since it has potential to create collisions of file names in existing filesystems. If the feature flag is not enabled, the entire filesystem will operate on opaque byte sequences, respecting the original behavior. The s_encoding field stores a magic number indicating the encoding format and version used globally by file and directory names in the filesystem. The s_encoding_flags defines policies for using the charset encoding, like how to handle invalid sequences. The magic number is mapped to the exact charset table, but the mapping is specific to ext4. Since we don't have any commitment to support old encodings, the only encoding I am supporting right now is utf8-12.1.0. The current implementation prevents the user from enabling encoding and per-directory encryption on the same filesystem at the same time. The incompatibility between these features lies in how we do efficient directory searches when we cannot be sure the encryption of the user provided fname will match the actual hash stored in the disk without decrypting every directory entry, because of normalization cases. My quickest solution is to simply block the concurrent use of these features for now, and enable it later, once we have a better solution. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 184944d4d8d1..c1b02c3a5a68 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1054,6 +1055,9 @@ static void ext4_put_super(struct super_block *sb) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); +#ifdef CONFIG_UNICODE + utf8_unload(sbi->s_encoding); +#endif kfree(sbi); } @@ -1750,6 +1754,36 @@ static const struct mount_opts { {Opt_err, 0, 0} }; +#ifdef CONFIG_UNICODE +static const struct ext4_sb_encodings { + __u16 magic; + char *name; + char *version; +} ext4_sb_encoding_map[] = { + {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"}, +}; + +static int ext4_sb_read_encoding(const struct ext4_super_block *es, + const struct ext4_sb_encodings **encoding, + __u16 *flags) +{ + __u16 magic = le16_to_cpu(es->s_encoding); + int i; + + for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++) + if (magic == ext4_sb_encoding_map[i].magic) + break; + + if (i >= ARRAY_SIZE(ext4_sb_encoding_map)) + return -EINVAL; + + *encoding = &ext4_sb_encoding_map[i]; + *flags = le16_to_cpu(es->s_encoding_flags); + + return 0; +} +#endif + static int handle_mount_opt(struct super_block *sb, char *opt, int token, substring_t *args, unsigned long *journal_devnum, unsigned int *journal_ioprio, int is_remount) @@ -2880,6 +2914,15 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } +#ifndef CONFIG_UNICODE + if (ext4_has_feature_casefold(sb)) { + ext4_msg(sb, KERN_ERR, + "Filesystem with casefold feature cannot be " + "mounted without CONFIG_UNICODE"); + return 0; + } +#endif + if (readonly) return 1; @@ -3770,6 +3813,43 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) &journal_ioprio, 0)) goto failed_mount; +#ifdef CONFIG_UNICODE + if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) { + const struct ext4_sb_encodings *encoding_info; + struct unicode_map *encoding; + __u16 encoding_flags; + + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_ERR, + "Can't mount with encoding and encryption"); + goto failed_mount; + } + + if (ext4_sb_read_encoding(es, &encoding_info, + &encoding_flags)) { + ext4_msg(sb, KERN_ERR, + "Encoding requested by superblock is unknown"); + goto failed_mount; + } + + encoding = utf8_load(encoding_info->version); + if (IS_ERR(encoding)) { + ext4_msg(sb, KERN_ERR, + "can't mount with superblock charset: %s-%s " + "not supported by the kernel. flags: 0x%x.", + encoding_info->name, encoding_info->version, + encoding_flags); + goto failed_mount; + } + ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " + "%s-%s with flags 0x%hx", encoding_info->name, + encoding_info->version?:"\b", encoding_flags); + + sbi->s_encoding = encoding; + sbi->s_encoding_flags = encoding_flags; + } +#endif + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " "with data=journal disables delayed " @@ -4586,6 +4666,11 @@ failed_mount2: failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); + +#ifdef CONFIG_UNICODE + utf8_unload(sbi->s_encoding); +#endif + #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); -- cgit v1.2.3 From b886ee3e778ec2ad43e276fd378ab492cf6819b7 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 25 Apr 2019 14:12:08 -0400 Subject: ext4: Support case-insensitive file name lookups This patch implements the actual support for case-insensitive file name lookups in ext4, based on the feature bit and the encoding stored in the superblock. A filesystem that has the casefold feature set is able to configure directories with the +F (EXT4_CASEFOLD_FL) attribute, enabling lookups to succeed in that directory in a case-insensitive fashion, i.e: match a directory entry even if the name used by userspace is not a byte per byte match with the disk name, but is an equivalent case-insensitive version of the Unicode string. This operation is called a case-insensitive file name lookup. The feature is configured as an inode attribute applied to directories and inherited by its children. This attribute can only be enabled on empty directories for filesystems that support the encoding feature, thus preventing collision of file names that only differ by case. * dcache handling: For a +F directory, Ext4 only stores the first equivalent name dentry used in the dcache. This is done to prevent unintentional duplication of dentries in the dcache, while also allowing the VFS code to quickly find the right entry in the cache despite which equivalent string was used in a previous lookup, without having to resort to ->lookup(). d_hash() of casefolded directories is implemented as the hash of the casefolded string, such that we always have a well-known bucket for all the equivalencies of the same string. d_compare() uses the utf8_strncasecmp() infrastructure, which handles the comparison of equivalent, same case, names as well. For now, negative lookups are not inserted in the dcache, since they would need to be invalidated anyway, because we can't trust missing file dentries. This is bad for performance but requires some leveraging of the vfs layer to fix. We can live without that for now, and so does everyone else. * on-disk data: Despite using a specific version of the name as the internal representation within the dcache, the name stored and fetched from the disk is a byte-per-byte match with what the user requested, making this implementation 'name-preserving'. i.e. no actual information is lost when writing to storage. DX is supported by modifying the hashes used in +F directories to make them case/encoding-aware. The new disk hashes are calculated as the hash of the full casefolded string, instead of the string directly. This allows us to efficiently search for file names in the htree without requiring the user to provide an exact name. * Dealing with invalid sequences: By default, when a invalid UTF-8 sequence is identified, ext4 will treat it as an opaque byte sequence, ignoring the encoding and reverting to the old behavior for that unique file. This means that case-insensitive file name lookup will not work only for that file. An optional bit can be set in the superblock telling the filesystem code and userspace tools to enforce the encoding. When that optional bit is set, any attempt to create a file name using an invalid UTF-8 sequence will fail and return an error to userspace. * Normalization algorithm: The UTF-8 algorithms used to compare strings in ext4 is implemented lives in fs/unicode, and is based on a previous version developed by SGI. It implements the Canonical decomposition (NFD) algorithm described by the Unicode specification 12.1, or higher, combined with the elimination of ignorable code points (NFDi) and full case-folding (CF) as documented in fs/unicode/utf8_norm.c. NFD seems to be the best normalization method for EXT4 because: - It has a lower cost than NFC/NFKC (which requires decomposing to NFD as an intermediary step) - It doesn't eliminate important semantic meaning like compatibility decompositions. Although: - This implementation is not completely linguistic accurate, because different languages have conflicting rules, which would require the specialization of the filesystem to a given locale, which brings all sorts of problems for removable media and for users who use more than one language. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c1b02c3a5a68..aeb6d22ea0ad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4484,6 +4484,12 @@ no_journal: iput(root); goto failed_mount4; } + +#ifdef CONFIG_UNICODE + if (sbi->s_encoding) + sb->s_d_op = &ext4_dentry_ops; +#endif + sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); -- cgit v1.2.3 From 50b29d8f033a7c88c5bc011abc2068b1691ab755 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Tue, 30 Apr 2019 23:08:15 -0400 Subject: ext4: fix ext4_show_options for file systems w/o journal Instead of removing EXT4_MOUNT_JOURNAL_CHECKSUM from s_def_mount_opt as I assume was intended, all other options were blown away leading to _ext4_show_options() output being incorrect. Fixes: 1e381f60dad9 ("ext4: do not allow journal_opts for fs w/o journal") Signed-off-by: Debabrata Banerjee Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index aeb6d22ea0ad..fc6fa2c93e77 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4349,7 +4349,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "data=, fs mounted w/o journal"); goto failed_mount_wq; } - sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; -- cgit v1.2.3