Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef LATINIME_CHAR_UTILS_H |
| 18 | #define LATINIME_CHAR_UTILS_H |
| 19 | |
| 20 | namespace latinime { |
| 21 | |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 22 | inline static int isAsciiUpper(unsigned short c) { |
| 23 | return c >= 'A' && c <= 'Z'; |
| 24 | } |
| 25 | |
| 26 | inline static unsigned short toAsciiLower(unsigned short c) { |
| 27 | return c - 'A' + 'a'; |
| 28 | } |
| 29 | |
| 30 | inline static int isAscii(unsigned short c) { |
| 31 | return c <= 127; |
| 32 | } |
| 33 | |
Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 34 | unsigned short latin_tolower(unsigned short c); |
| 35 | |
Tadashi G. Takaoka | 6e3cb27 | 2011-11-11 14:26:13 +0900 | [diff] [blame] | 36 | /** |
| 37 | * Table mapping most combined Latin, Greek, and Cyrillic characters |
| 38 | * to their base characters. If c is in range, BASE_CHARS[c] == c |
| 39 | * if c is not a combined character, or the base character if it |
| 40 | * is combined. |
| 41 | */ |
| 42 | |
| 43 | static const int BASE_CHARS_SIZE = 0x0500; |
| 44 | extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; |
| 45 | |
| 46 | inline static unsigned short toBaseChar(unsigned short c) { |
| 47 | if (c < BASE_CHARS_SIZE) { |
| 48 | return BASE_CHARS[c]; |
| 49 | } |
| 50 | return c; |
| 51 | } |
| 52 | |
| 53 | inline static unsigned short toBaseLowerCase(unsigned short c) { |
| 54 | c = toBaseChar(c); |
| 55 | if (isAsciiUpper(c)) { |
| 56 | return toAsciiLower(c); |
| 57 | } else if (isAscii(c)) { |
| 58 | return c; |
| 59 | } |
| 60 | return latin_tolower(c); |
| 61 | } |
| 62 | |
Ken Wakasa | ce9e52a | 2011-06-18 13:09:55 +0900 | [diff] [blame] | 63 | } // namespace latinime |
Ken Wakasa | 07cab72 | 2010-04-20 01:24:57 +0900 | [diff] [blame] | 64 | |
| 65 | #endif // LATINIME_CHAR_UTILS_H |