Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef LATINIME_CHAR_UTILS_H |
| 18 | #define LATINIME_CHAR_UTILS_H |
| 19 | |
Ken Wakasa | de8a9a8 | 2012-08-17 13:06:28 +0900 | [diff] [blame] | 20 | #include <cctype> |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame^] | 21 | #include <stdint.h> |
Ken Wakasa | de8a9a8 | 2012-08-17 13:06:28 +0900 | [diff] [blame] | 22 | |
Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 23 | namespace latinime { |
| 24 | |
Ken Wakasa | de8a9a8 | 2012-08-17 13:06:28 +0900 | [diff] [blame] | 25 | inline static bool isAsciiUpper(unsigned short c) { |
| 26 | return isupper(static_cast<int>(c)) != 0; |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 27 | } |
| 28 | |
| 29 | inline static unsigned short toAsciiLower(unsigned short c) { |
| 30 | return c - 'A' + 'a'; |
| 31 | } |
| 32 | |
Ken Wakasa | de8a9a8 | 2012-08-17 13:06:28 +0900 | [diff] [blame] | 33 | inline static bool isAscii(unsigned short c) { |
| 34 | return isascii(static_cast<int>(c)) != 0; |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 35 | } |
| 36 | |
Ken Wakasa | de8a9a8 | 2012-08-17 13:06:28 +0900 | [diff] [blame] | 37 | unsigned short latin_tolower(const unsigned short c); |
Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 38 | |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 39 | /** |
| 40 | * Table mapping most combined Latin, Greek, and Cyrillic characters |
| 41 | * to their base characters. If c is in range, BASE_CHARS[c] == c |
| 42 | * if c is not a combined character, or the base character if it |
| 43 | * is combined. |
| 44 | */ |
| 45 | |
| 46 | static const int BASE_CHARS_SIZE = 0x0500; |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame^] | 47 | extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE]; |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 48 | |
| 49 | inline static unsigned short toBaseChar(unsigned short c) { |
| 50 | if (c < BASE_CHARS_SIZE) { |
| 51 | return BASE_CHARS[c]; |
| 52 | } |
| 53 | return c; |
| 54 | } |
| 55 | |
Jean Chalard | e9a86e2 | 2012-06-28 21:01:29 +0900 | [diff] [blame] | 56 | inline static unsigned short toLowerCase(const unsigned short c) { |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 57 | if (isAsciiUpper(c)) { |
| 58 | return toAsciiLower(c); |
| 59 | } else if (isAscii(c)) { |
| 60 | return c; |
| 61 | } |
| 62 | return latin_tolower(c); |
| 63 | } |
| 64 | |
Jean Chalard | e9a86e2 | 2012-06-28 21:01:29 +0900 | [diff] [blame] | 65 | inline static unsigned short toBaseLowerCase(const unsigned short c) { |
| 66 | return toLowerCase(toBaseChar(c)); |
| 67 | } |
Ken Wakasa | ce9e52a | 2011-06-18 13:09:55 +0900 | [diff] [blame] | 68 | } // namespace latinime |
Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 69 | #endif // LATINIME_CHAR_UTILS_H |