diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2009-04-30 10:08:18 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2009-06-15 21:44:43 -0700 |
commit | 74675a58507e769beee7d949dbed788af3c4139d (patch) | |
tree | d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/isofs | |
parent | a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff) | |
download | linux-74675a58507e769beee7d949dbed788af3c4139d.tar.gz linux-74675a58507e769beee7d949dbed788af3c4139d.tar.bz2 linux-74675a58507e769beee7d949dbed788af3c4139d.zip |
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.
The old wchar_t 16-bit type is retained because it's still used in
lots of places. This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.
Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/isofs')
-rw-r--r-- | fs/isofs/joliet.c | 36 |
1 files changed, 3 insertions, 33 deletions
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) return (op - ascii); } -/* Convert big endian wide character string to utf8 */ -static int -wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen) -{ - const __u8 *ip; - __u8 *op; - int size; - __u16 c; - - op = s; - ip = pwcs; - while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) { - c = (*ip << 8) | ip[1]; - if (c > 0x7f) { - size = utf8_wctomb(op, c, maxlen); - if (size == -1) { - /* Ignore character and move on */ - maxlen--; - } else { - op += size; - maxlen -= size; - } - } else { - *op++ = (__u8) c; - } - ip += 2; - inlen--; - } - return (op - s); -} - int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { @@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; if (utf8) { - len = wcsntombs_be(outname, de->name, - de->name_len[0] >> 1, PAGE_SIZE); + len = utf16s_to_utf8s((const wchar_t *) de->name, + de->name_len[0] >> 1, UTF16_BIG_ENDIAN, + outname, PAGE_SIZE); } else { len = uni16_to_x8(outname, (__be16 *) de->name, de->name_len[0] >> 1, nls); |