summaryrefslogtreecommitdiffstats
path: root/drivers/firmware/efi/libstub/efi-stub-helper.c
diff options
context:
space:
mode:
authorArvind Sankar <nivedita@alum.mit.edu>2020-05-18 15:07:13 -0400
committerArd Biesheuvel <ardb@kernel.org>2020-05-20 19:09:20 +0200
commit4b75bd363de3f3429c62359be4e4aa4da7fa233b (patch)
treefcca2307e27570aa9c7a8140de24dba7f2c479a4 /drivers/firmware/efi/libstub/efi-stub-helper.c
parentd850a2ff918be0691f8d0544a2156c856c42da5b (diff)
downloadlinux-4b75bd363de3f3429c62359be4e4aa4da7fa233b.tar.gz
linux-4b75bd363de3f3429c62359be4e4aa4da7fa233b.tar.bz2
linux-4b75bd363de3f3429c62359be4e4aa4da7fa233b.zip
efi/libstub: Add UTF-8 decoding to efi_puts
In order to be able to use the UTF-16 support added to vsprintf in the previous commit, enhance efi_puts to decode UTF-8 into UTF-16. Invalid UTF-8 encodings are passed through unchanged. Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu> Link: https://lore.kernel.org/r/20200518190716.751506-22-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Diffstat (limited to 'drivers/firmware/efi/libstub/efi-stub-helper.c')
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c67
1 files changed, 62 insertions, 5 deletions
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index f338d149aaa5..0d0007355c1e 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -36,17 +36,74 @@ void efi_char16_puts(efi_char16_t *str)
output_string, str);
}
+static
+u32 utf8_to_utf32(const u8 **s8)
+{
+ u32 c32;
+ u8 c0, cx;
+ size_t clen, i;
+
+ c0 = cx = *(*s8)++;
+ /*
+ * The position of the most-significant 0 bit gives us the length of
+ * a multi-octet encoding.
+ */
+ for (clen = 0; cx & 0x80; ++clen)
+ cx <<= 1;
+ /*
+ * If the 0 bit is in position 8, this is a valid single-octet
+ * encoding. If the 0 bit is in position 7 or positions 1-3, the
+ * encoding is invalid.
+ * In either case, we just return the first octet.
+ */
+ if (clen < 2 || clen > 4)
+ return c0;
+ /* Get the bits from the first octet. */
+ c32 = cx >> clen--;
+ for (i = 0; i < clen; ++i) {
+ /* Trailing octets must have 10 in most significant bits. */
+ cx = (*s8)[i] ^ 0x80;
+ if (cx & 0xc0)
+ return c0;
+ c32 = (c32 << 6) | cx;
+ }
+ /*
+ * Check for validity:
+ * - The character must be in the Unicode range.
+ * - It must not be a surrogate.
+ * - It must be encoded using the correct number of octets.
+ */
+ if (c32 > 0x10ffff ||
+ (c32 & 0xf800) == 0xd800 ||
+ clen != (c32 >= 0x80) + (c32 >= 0x800) + (c32 >= 0x10000))
+ return c0;
+ *s8 += clen;
+ return c32;
+}
+
void efi_puts(const char *str)
{
efi_char16_t buf[128];
size_t pos = 0, lim = ARRAY_SIZE(buf);
+ const u8 *s8 = (const u8 *)str;
+ u32 c32;
- while (*str) {
- if (*str == '\n')
+ while (*s8) {
+ if (*s8 == '\n')
buf[pos++] = L'\r';
- /* Cast to unsigned char to avoid sign-extension */
- buf[pos++] = (unsigned char)(*str++);
- if (*str == '\0' || pos >= lim - 2) {
+ c32 = utf8_to_utf32(&s8);
+ if (c32 < 0x10000) {
+ /* Characters in plane 0 use a single word. */
+ buf[pos++] = c32;
+ } else {
+ /*
+ * Characters in other planes encode into a surrogate
+ * pair.
+ */
+ buf[pos++] = (0xd800 - (0x10000 >> 10)) + (c32 >> 10);
+ buf[pos++] = 0xdc00 + (c32 & 0x3ff);
+ }
+ if (*s8 == '\0' || pos >= lim - 2) {
buf[pos] = L'\0';
efi_char16_puts(buf);
pos = 0;