summaryrefslogtreecommitdiffstats
path: root/fs/nls
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nls')
-rw-r--r--fs/nls/Kconfig8
-rw-r--r--fs/nls/Makefile1
-rw-r--r--fs/nls/nls_ucs2_utils.c146
-rw-r--r--fs/nls/nls_ucs2_utils.h297
4 files changed, 452 insertions, 0 deletions
diff --git a/fs/nls/Kconfig b/fs/nls/Kconfig
index c7857e36adbb..a0d0e2f7ec83 100644
--- a/fs/nls/Kconfig
+++ b/fs/nls/Kconfig
@@ -617,4 +617,12 @@ config NLS_UTF8
input/output character sets. Say Y here for the UTF-8 encoding of
the Unicode/ISO9646 universal character set.
+config NLS_UCS2_UTILS
+ tristate "NLS UCS-2 UTILS"
+ help
+ Set of older UCS-2 conversion utilities and tables used by some
+ filesystems including SMB/CIFS. This includes upper case conversion
+ tables. This will automatically be selected when the filesystem
+ that uses it is selected.
+
endif # NLS
diff --git a/fs/nls/Makefile b/fs/nls/Makefile
index ac54db297128..5062c699d041 100644
--- a/fs/nls/Makefile
+++ b/fs/nls/Makefile
@@ -54,3 +54,4 @@ obj-$(CONFIG_NLS_MAC_INUIT) += mac-inuit.o
obj-$(CONFIG_NLS_MAC_ROMANIAN) += mac-romanian.o
obj-$(CONFIG_NLS_MAC_ROMAN) += mac-roman.o
obj-$(CONFIG_NLS_MAC_TURKISH) += mac-turkish.o
+obj-$(CONFIG_NLS_UCS2_UTILS) += nls_ucs2_utils.o
diff --git a/fs/nls/nls_ucs2_utils.c b/fs/nls/nls_ucs2_utils.c
new file mode 100644
index 000000000000..a69781c54dd8
--- /dev/null
+++ b/fs/nls/nls_ucs2_utils.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Some of the source code in this file came from fs/cifs/uniupr.h
+ * Copyright (c) International Business Machines Corp., 2000,2002
+ *
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ * Modified by Steve French (sfrench@us.ibm.com)
+ * Modified by Namjae Jeon (linkinjeon@kernel.org)
+ *
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+#include "nls_ucs2_utils.h"
+
+MODULE_LICENSE("GPL");
+
+/*
+ * Latin upper case
+ */
+signed char NlsUniUpperTable[512] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* 060-06f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, 0, 0, 0, 0, 0, /* 070-07f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, /* 0e0-0ef */
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32,
+ -32, -32, -32, -32, -32, 121, /* 0f0-0ff */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */
+ 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */
+ 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */
+ 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */
+ 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */
+ -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */
+ 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -79, 0, -1, /* 1d0-1df */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */
+ 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */
+};
+EXPORT_SYMBOL_GPL(NlsUniUpperTable);
+
+/* Upper case range - Greek */
+static signed char UniCaseRangeU03a0[47] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -38, -37, -37, -37, /* 3a0-3af */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 3b0-3bf */
+ -32, -32, -31, -32, -32, -32, -32, -32, -32, -32, -32, -32, -64,
+ -63, -63,
+};
+
+/* Upper case range - Cyrillic */
+static signed char UniCaseRangeU0430[48] = {
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 430-43f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 440-44f */
+ 0, -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, 0, -80, -80, /* 450-45f */
+};
+
+/* Upper case range - Extended cyrillic */
+static signed char UniCaseRangeU0490[61] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */
+ 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1,
+};
+
+/* Upper case range - Extended latin and greek */
+static signed char UniCaseRangeU1e00[509] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */
+ 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, -59, 0, -1, 0, -1, /* 1e90-1e9f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */
+ 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */
+ 74, 74, 86, 86, 86, 86, 100, 100, 0, 0, 112, 112,
+ 126, 126, 0, 0, /* 1f70-1f7f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */
+ 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */
+ 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */
+ 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Upper case range - Wide latin */
+static signed char UniCaseRangeUff40[27] = {
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* ff40-ff4f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+};
+
+/*
+ * Upper Case Range
+ */
+const struct UniCaseRange NlsUniUpperRange[] = {
+ {0x03a0, 0x03ce, UniCaseRangeU03a0},
+ {0x0430, 0x045f, UniCaseRangeU0430},
+ {0x0490, 0x04cc, UniCaseRangeU0490},
+ {0x1e00, 0x1ffc, UniCaseRangeU1e00},
+ {0xff40, 0xff5a, UniCaseRangeUff40},
+ {0}
+};
+EXPORT_SYMBOL_GPL(NlsUniUpperRange);
diff --git a/fs/nls/nls_ucs2_utils.h b/fs/nls/nls_ucs2_utils.h
new file mode 100644
index 000000000000..3500596ea993
--- /dev/null
+++ b/fs/nls/nls_ucs2_utils.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ * and then via server/unicode.c
+ * cifs_unicode: Unicode kernel case support
+ *
+ * Function:
+ * Convert a unicode character to upper or lower case using
+ * compressed tables.
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ *
+ *
+ * Notes:
+ * These APIs are based on the C library functions. The semantics
+ * should match the C functions but with expanded size operands.
+ *
+ * The upper/lower functions are based on a table created by mkupr.
+ * This is a compressed table of upper and lower case conversion.
+ *
+ */
+#ifndef _NLS_UCS2_UTILS_H
+#define _NLS_UCS2_UTILS_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+#include <linux/nls.h>
+#include <linux/unicode.h>
+
+/*
+ * Windows maps these to the user defined 16 bit Unicode range since they are
+ * reserved symbols (along with \ and /), otherwise illegal to store
+ * in filenames in NTFS
+ */
+#define UNI_ASTERISK ((__u16)('*' + 0xF000))
+#define UNI_QUESTION ((__u16)('?' + 0xF000))
+#define UNI_COLON ((__u16)(':' + 0xF000))
+#define UNI_GRTRTHAN ((__u16)('>' + 0xF000))
+#define UNI_LESSTHAN ((__u16)('<' + 0xF000))
+#define UNI_PIPE ((__u16)('|' + 0xF000))
+#define UNI_SLASH ((__u16)('\\' + 0xF000))
+
+#ifndef UNICASERANGE_DEFINED
+struct UniCaseRange {
+ wchar_t start;
+ wchar_t end;
+ signed char *table;
+};
+#endif /* UNICASERANGE_DEFINED */
+
+#ifndef UNIUPR_NOUPPER
+extern signed char NlsUniUpperTable[512];
+extern const struct UniCaseRange NlsUniUpperRange[];
+#endif /* UNIUPR_NOUPPER */
+
+/*
+ * UniStrcat: Concatenate the second string to the first
+ *
+ * Returns:
+ * Address of the first string
+ */
+static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
+
+ while (*ucs1++)
+ /*NULL*/; /* To end of first string */
+ ucs1--; /* Return to the null */
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/; /* copy string 2 over */
+ return anchor;
+}
+
+/*
+ * UniStrchr: Find a character in a string
+ *
+ * Returns:
+ * Address of first occurrence of character in string
+ * or NULL if the character is not in the string
+ */
+static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
+{
+ while ((*ucs != uc) && *ucs)
+ ucs++;
+
+ if (*ucs == uc)
+ return (wchar_t *)ucs;
+ return NULL;
+}
+
+/*
+ * UniStrcmp: Compare two strings
+ *
+ * Returns:
+ * < 0: First string is less than second
+ * = 0: Strings are equal
+ * > 0: First string is greater than second
+ */
+static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ while ((*ucs1 == *ucs2) && *ucs1) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrcpy: Copy a string
+ */
+static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save the start of result string */
+
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/;
+ return anchor;
+}
+
+/*
+ * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
+ */
+static inline size_t UniStrlen(const wchar_t *ucs1)
+{
+ int i = 0;
+
+ while (*ucs1++)
+ i++;
+ return i;
+}
+
+/*
+ * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
+ * string (length limited)
+ */
+static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
+{
+ int i = 0;
+
+ while (*ucs1++) {
+ i++;
+ if (i >= maxlen)
+ break;
+ }
+ return i;
+}
+
+/*
+ * UniStrncat: Concatenate length limited string
+ */
+static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1; /* save pointer to string 1 */
+
+ while (*ucs1++)
+ /*NULL*/;
+ ucs1--; /* point to null terminator of s1 */
+ while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */
+ ucs1++;
+ ucs2++;
+ }
+ *ucs1 = 0; /* Null terminate the result */
+ return anchor;
+}
+
+/*
+ * UniStrncmp: Compare length limited string
+ */
+static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == *ucs2) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrncmp_le: Compare length limited string - native to little-endian
+ */
+static inline int
+UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
+}
+
+/*
+ * UniStrncpy: Copy length limited string with pad
+ */
+static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = *ucs2++;
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrncpy_le: Copy length limited string with pad to little-endian
+ */
+static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = __le16_to_cpu(*ucs2++);
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrstr: Find a string in a string
+ *
+ * Returns:
+ * Address of first match found
+ * NULL if no matching string is found
+ */
+static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ const wchar_t *anchor1 = ucs1;
+ const wchar_t *anchor2 = ucs2;
+
+ while (*ucs1) {
+ if (*ucs1 == *ucs2) {
+ /* Partial match found */
+ ucs1++;
+ ucs2++;
+ } else {
+ if (!*ucs2) /* Match found */
+ return (wchar_t *)anchor1;
+ ucs1 = ++anchor1; /* No match */
+ ucs2 = anchor2;
+ }
+ }
+
+ if (!*ucs2) /* Both end together */
+ return (wchar_t *)anchor1; /* Match found */
+ return NULL; /* No match */
+}
+
+#ifndef UNIUPR_NOUPPER
+/*
+ * UniToupper: Convert a unicode character to upper case
+ */
+static inline wchar_t UniToupper(register wchar_t uc)
+{
+ register const struct UniCaseRange *rp;
+
+ if (uc < sizeof(NlsUniUpperTable)) {
+ /* Latin characters */
+ return uc + NlsUniUpperTable[uc]; /* Use base tables */
+ }
+
+ rp = NlsUniUpperRange; /* Use range tables */
+ while (rp->start) {
+ if (uc < rp->start) /* Before start of range */
+ return uc; /* Uppercase = input */
+ if (uc <= rp->end) /* In range */
+ return uc + rp->table[uc - rp->start];
+ rp++; /* Try next range */
+ }
+ return uc; /* Past last range */
+}
+
+/*
+ * UniStrupr: Upper case a unicode string
+ */
+static inline __le16 *UniStrupr(register __le16 *upin)
+{
+ register __le16 *up;
+
+ up = upin;
+ while (*up) { /* For all characters */
+ *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
+ up++;
+ }
+ return upin; /* Return input pointer */
+}
+#endif /* UNIUPR_NOUPPER */
+
+#endif /* _NLS_UCS2_UTILS_H */