diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2011-11-17 16:42:19 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-11-18 10:51:01 -0800 |
commit | 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd (patch) | |
tree | 6895ca20afb24b38f0246b370cba7dc6e72764cb /fs/nls | |
parent | b7463c71fbbff7111d0c879d2f64fe2b08f51848 (diff) | |
download | linux-0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd.tar.gz linux-0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd.tar.bz2 linux-0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd.zip |
NLS: improve UTF8 -> UTF16 string conversion routine
The utf8s_to_utf16s conversion routine needs to be improved. Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.
This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called. A
follow-on patch will add a third caller that does utilize the new
capabilities.
The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations. But that's a subject for a
different patch.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/nls')
-rw-r--r-- | fs/nls/nls_base.c | 43 |
1 files changed, 33 insertions, 10 deletions
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c8..0eb059ec6f28 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) } EXPORT_SYMBOL(utf32_to_utf8); -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, + wchar_t *pwcs, int maxlen) { u16 *op; int size; unicode_t u; op = pwcs; - while (*s && len > 0) { + while (len > 0 && maxlen > 0 && *s) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); if (size < 0) return -EINVAL; + s += size; + len -= size; if (u >= PLANE_SIZE) { + if (maxlen < 2) + break; u -= PLANE_SIZE; - *op++ = (wchar_t) (SURROGATE_PAIR | - ((u >> 10) & SURROGATE_BITS)); - *op++ = (wchar_t) (SURROGATE_PAIR | + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | SURROGATE_LOW | - (u & SURROGATE_BITS)); + (u & SURROGATE_BITS), + endian); + maxlen -= 2; } else { - *op++ = (wchar_t) u; + put_utf16(op++, u, endian); + maxlen--; } - s += size; - len -= size; } else { - *op++ = *s++; + put_utf16(op++, *s++, endian); len--; + maxlen--; } } return op - pwcs; |