diff options
Diffstat (limited to 'fs/unicode/utf8-norm.c')
-rw-r--r-- | fs/unicode/utf8-norm.c | 262 |
1 files changed, 32 insertions, 230 deletions
diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c index 1d2d2e5b906a..768f8ab448b8 100644 --- a/fs/unicode/utf8-norm.c +++ b/fs/unicode/utf8-norm.c @@ -6,34 +6,17 @@ #include "utf8n.h" -struct utf8data { - unsigned int maxage; - unsigned int offset; -}; - -#define __INCLUDED_FROM_UTF8NORM_C__ -#include "utf8data.h" -#undef __INCLUDED_FROM_UTF8NORM_C__ - -int utf8version_is_supported(u8 maj, u8 min, u8 rev) +int utf8version_is_supported(const struct unicode_map *um, unsigned int version) { - int i = ARRAY_SIZE(utf8agetab) - 1; - unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev); + int i = um->tables->utf8agetab_size - 1; - while (i >= 0 && utf8agetab[i] != 0) { - if (sb_utf8version == utf8agetab[i]) + while (i >= 0 && um->tables->utf8agetab[i] != 0) { + if (version == um->tables->utf8agetab[i]) return 1; i--; } return 0; } -EXPORT_SYMBOL(utf8version_is_supported); - -int utf8version_latest(void) -{ - return utf8vers; -} -EXPORT_SYMBOL(utf8version_latest); /* * UTF-8 valid ranges. @@ -168,7 +151,7 @@ typedef const unsigned char utf8trie_t; * underlying datatype: unsigned char. * * leaf[0]: The unicode version, stored as a generation number that is - * an index into utf8agetab[]. With this we can filter code + * an index into ->utf8agetab[]. With this we can filter code * points based on the unicode version in which they were * defined. The CCC of a non-defined code point is 0. * leaf[1]: Canonical Combining Class. During normalization, we need @@ -316,21 +299,19 @@ utf8hangul(const char *str, unsigned char *hangul) * is well-formed and corresponds to a known unicode code point. The * shorthand for this will be "is valid UTF-8 unicode". */ -static utf8leaf_t *utf8nlookup(const struct utf8data *data, - unsigned char *hangul, const char *s, size_t len) +static utf8leaf_t *utf8nlookup(const struct unicode_map *um, + enum utf8_normalization n, unsigned char *hangul, const char *s, + size_t len) { - utf8trie_t *trie = NULL; + utf8trie_t *trie = um->tables->utf8data + um->ntab[n]->offset; int offlen; int offset; int mask; int node; - if (!data) - return NULL; if (len == 0) return NULL; - trie = utf8data + data->offset; node = 1; while (node) { offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT; @@ -392,172 +373,29 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data, * * Forwards to utf8nlookup(). */ -static utf8leaf_t *utf8lookup(const struct utf8data *data, - unsigned char *hangul, const char *s) +static utf8leaf_t *utf8lookup(const struct unicode_map *um, + enum utf8_normalization n, unsigned char *hangul, const char *s) { - return utf8nlookup(data, hangul, s, (size_t)-1); -} - -/* - * Maximum age of any character in s. - * Return -1 if s is not valid UTF-8 unicode. - * Return 0 if only non-assigned code points are used. - */ -int utf8agemax(const struct utf8data *data, const char *s) -{ - utf8leaf_t *leaf; - int age = 0; - int leaf_age; - unsigned char hangul[UTF8HANGULLEAF]; - - if (!data) - return -1; - - while (*s) { - leaf = utf8lookup(data, hangul, s); - if (!leaf) - return -1; - - leaf_age = utf8agetab[LEAF_GEN(leaf)]; - if (leaf_age <= data->maxage && leaf_age > age) - age = leaf_age; - s += utf8clen(s); - } - return age; + return utf8nlookup(um, n, hangul, s, (size_t)-1); } -EXPORT_SYMBOL(utf8agemax); - -/* - * Minimum age of any character in s. - * Return -1 if s is not valid UTF-8 unicode. - * Return 0 if non-assigned code points are used. - */ -int utf8agemin(const struct utf8data *data, const char *s) -{ - utf8leaf_t *leaf; - int age; - int leaf_age; - unsigned char hangul[UTF8HANGULLEAF]; - - if (!data) - return -1; - age = data->maxage; - while (*s) { - leaf = utf8lookup(data, hangul, s); - if (!leaf) - return -1; - leaf_age = utf8agetab[LEAF_GEN(leaf)]; - if (leaf_age <= data->maxage && leaf_age < age) - age = leaf_age; - s += utf8clen(s); - } - return age; -} -EXPORT_SYMBOL(utf8agemin); - -/* - * Maximum age of any character in s, touch at most len bytes. - * Return -1 if s is not valid UTF-8 unicode. - */ -int utf8nagemax(const struct utf8data *data, const char *s, size_t len) -{ - utf8leaf_t *leaf; - int age = 0; - int leaf_age; - unsigned char hangul[UTF8HANGULLEAF]; - - if (!data) - return -1; - - while (len && *s) { - leaf = utf8nlookup(data, hangul, s, len); - if (!leaf) - return -1; - leaf_age = utf8agetab[LEAF_GEN(leaf)]; - if (leaf_age <= data->maxage && leaf_age > age) - age = leaf_age; - len -= utf8clen(s); - s += utf8clen(s); - } - return age; -} -EXPORT_SYMBOL(utf8nagemax); - -/* - * Maximum age of any character in s, touch at most len bytes. - * Return -1 if s is not valid UTF-8 unicode. - */ -int utf8nagemin(const struct utf8data *data, const char *s, size_t len) -{ - utf8leaf_t *leaf; - int leaf_age; - int age; - unsigned char hangul[UTF8HANGULLEAF]; - - if (!data) - return -1; - age = data->maxage; - while (len && *s) { - leaf = utf8nlookup(data, hangul, s, len); - if (!leaf) - return -1; - leaf_age = utf8agetab[LEAF_GEN(leaf)]; - if (leaf_age <= data->maxage && leaf_age < age) - age = leaf_age; - len -= utf8clen(s); - s += utf8clen(s); - } - return age; -} -EXPORT_SYMBOL(utf8nagemin); - -/* - * Length of the normalization of s. - * Return -1 if s is not valid UTF-8 unicode. - * - * A string of Default_Ignorable_Code_Point has length 0. - */ -ssize_t utf8len(const struct utf8data *data, const char *s) -{ - utf8leaf_t *leaf; - size_t ret = 0; - unsigned char hangul[UTF8HANGULLEAF]; - - if (!data) - return -1; - while (*s) { - leaf = utf8lookup(data, hangul, s); - if (!leaf) - return -1; - if (utf8agetab[LEAF_GEN(leaf)] > data->maxage) - ret += utf8clen(s); - else if (LEAF_CCC(leaf) == DECOMPOSE) - ret += strlen(LEAF_STR(leaf)); - else - ret += utf8clen(s); - s += utf8clen(s); - } - return ret; -} -EXPORT_SYMBOL(utf8len); /* * Length of the normalization of s, touch at most len bytes. * Return -1 if s is not valid UTF-8 unicode. */ -ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len) +ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, + const char *s, size_t len) { utf8leaf_t *leaf; size_t ret = 0; unsigned char hangul[UTF8HANGULLEAF]; - if (!data) - return -1; while (len && *s) { - leaf = utf8nlookup(data, hangul, s, len); + leaf = utf8nlookup(um, n, hangul, s, len); if (!leaf) return -1; - if (utf8agetab[LEAF_GEN(leaf)] > data->maxage) + if (um->tables->utf8agetab[LEAF_GEN(leaf)] > + um->ntab[n]->maxage) ret += utf8clen(s); else if (LEAF_CCC(leaf) == DECOMPOSE) ret += strlen(LEAF_STR(leaf)); @@ -568,7 +406,6 @@ ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len) } return ret; } -EXPORT_SYMBOL(utf8nlen); /* * Set up an utf8cursor for use by utf8byte(). @@ -580,14 +417,13 @@ EXPORT_SYMBOL(utf8nlen); * * Returns -1 on error, 0 on success. */ -int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, - const char *s, size_t len) +int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, + enum utf8_normalization n, const char *s, size_t len) { - if (!data) - return -1; if (!s) return -1; - u8c->data = data; + u8c->um = um; + u8c->n = n; u8c->s = s; u8c->p = NULL; u8c->ss = NULL; @@ -604,23 +440,6 @@ int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, return -1; return 0; } -EXPORT_SYMBOL(utf8ncursor); - -/* - * Set up an utf8cursor for use by utf8byte(). - * - * u8c : pointer to cursor. - * data : const struct utf8data to use for normalization. - * s : NUL-terminated string. - * - * Returns -1 on error, 0 on success. - */ -int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data, - const char *s) -{ - return utf8ncursor(u8c, data, s, (unsigned int)-1); -} -EXPORT_SYMBOL(utf8cursor); /* * Get one byte from the normalized form of the string described by u8c. @@ -678,9 +497,9 @@ int utf8byte(struct utf8cursor *u8c) /* Look up the data for the current character. */ if (u8c->p) { - leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); + leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); } else { - leaf = utf8nlookup(u8c->data, u8c->hangul, + leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul, u8c->s, u8c->len); } @@ -690,7 +509,8 @@ int utf8byte(struct utf8cursor *u8c) ccc = LEAF_CCC(leaf); /* Characters that are too new have CCC 0. */ - if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) { + if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] > + u8c->um->ntab[u8c->n]->maxage) { ccc = STOPPER; } else if (ccc == DECOMPOSE) { u8c->len -= utf8clen(u8c->s); @@ -704,7 +524,7 @@ int utf8byte(struct utf8cursor *u8c) goto ccc_mismatch; } - leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); + leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); if (!leaf) return -1; ccc = LEAF_CCC(leaf); @@ -765,28 +585,10 @@ ccc_mismatch: } } } -EXPORT_SYMBOL(utf8byte); - -const struct utf8data *utf8nfdi(unsigned int maxage) -{ - int i = ARRAY_SIZE(utf8nfdidata) - 1; - - while (maxage < utf8nfdidata[i].maxage) - i--; - if (maxage > utf8nfdidata[i].maxage) - return NULL; - return &utf8nfdidata[i]; -} -EXPORT_SYMBOL(utf8nfdi); - -const struct utf8data *utf8nfdicf(unsigned int maxage) -{ - int i = ARRAY_SIZE(utf8nfdicfdata) - 1; - while (maxage < utf8nfdicfdata[i].maxage) - i--; - if (maxage > utf8nfdicfdata[i].maxage) - return NULL; - return &utf8nfdicfdata[i]; -} -EXPORT_SYMBOL(utf8nfdicf); +#ifdef CONFIG_UNICODE_NORMALIZATION_SELFTEST_MODULE +EXPORT_SYMBOL_GPL(utf8version_is_supported); +EXPORT_SYMBOL_GPL(utf8nlen); +EXPORT_SYMBOL_GPL(utf8ncursor); +EXPORT_SYMBOL_GPL(utf8byte); +#endif |