Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 17 | #include <array> |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 18 | #include <cstdint> |
| 19 | #include <cstdlib> |
| 20 | #include <cstring> |
| 21 | #include <string> |
| 22 | #include <unordered_map> |
| 23 | #include <unordered_set> |
| 24 | |
| 25 | #include <androidfw/LocaleData.h> |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 26 | #include <androidfw/LocaleDataLookup.h> |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 27 | |
| 28 | namespace android { |
| 29 | |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 30 | const uint32_t PACKED_ROOT = 0; // to represent the root locale |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 31 | const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth(); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 32 | |
| 33 | uint32_t findParent(uint32_t packed_locale, const char* script) { |
| 34 | if (hasRegion(packed_locale)) { |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 35 | auto parent_key = findParentLocalePackedKey(script, packed_locale); |
| 36 | if (parent_key != 0) { |
| 37 | return parent_key; |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 38 | } |
| 39 | return dropRegion(packed_locale); |
| 40 | } |
| 41 | return PACKED_ROOT; |
| 42 | } |
| 43 | |
| 44 | // Find the ancestors of a locale, and fill 'out' with it (assumes out has enough |
| 45 | // space). If any of the members of stop_list was seen, write it in the |
| 46 | // output but stop afterwards. |
| 47 | // |
| 48 | // This also outputs the index of the last written ancestor in the stop_list |
| 49 | // to stop_list_index, which will be -1 if it is not found in the stop_list. |
| 50 | // |
| 51 | // Returns the number of ancestors written in the output, which is always |
| 52 | // at least one. |
Roozbeh Pournader | 27953c3 | 2016-02-01 13:49:52 -0800 | [diff] [blame] | 53 | // |
| 54 | // (If 'out' is nullptr, we do everything the same way but we simply don't write |
| 55 | // any results in 'out'.) |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 56 | size_t findAncestors(uint32_t* out, ssize_t* stop_list_index, |
| 57 | uint32_t packed_locale, const char* script, |
| 58 | const uint32_t* stop_list, size_t stop_set_length) { |
| 59 | uint32_t ancestor = packed_locale; |
| 60 | size_t count = 0; |
| 61 | do { |
Roozbeh Pournader | 27953c3 | 2016-02-01 13:49:52 -0800 | [diff] [blame] | 62 | if (out != nullptr) out[count] = ancestor; |
| 63 | count++; |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 64 | for (size_t i = 0; i < stop_set_length; i++) { |
| 65 | if (stop_list[i] == ancestor) { |
| 66 | *stop_list_index = (ssize_t) i; |
| 67 | return count; |
| 68 | } |
| 69 | } |
| 70 | ancestor = findParent(ancestor, script); |
| 71 | } while (ancestor != PACKED_ROOT); |
| 72 | *stop_list_index = (ssize_t) -1; |
| 73 | return count; |
| 74 | } |
| 75 | |
| 76 | size_t findDistance(uint32_t supported, |
| 77 | const char* script, |
| 78 | const uint32_t* request_ancestors, |
| 79 | size_t request_ancestors_count) { |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 80 | ssize_t request_ancestors_index; |
| 81 | const size_t supported_ancestor_count = findAncestors( |
Roozbeh Pournader | 27953c3 | 2016-02-01 13:49:52 -0800 | [diff] [blame] | 82 | nullptr, &request_ancestors_index, |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 83 | supported, script, |
| 84 | request_ancestors, request_ancestors_count); |
| 85 | // Since both locales share the same root, there will always be a shared |
| 86 | // ancestor, so the distance in the parent tree is the sum of the distance |
| 87 | // of 'supported' to the lowest common ancestor (number of ancestors |
| 88 | // written for 'supported' minus 1) plus the distance of 'request' to the |
| 89 | // lowest common ancestor (the index of the ancestor in request_ancestors). |
| 90 | return supported_ancestor_count + request_ancestors_index - 1; |
| 91 | } |
| 92 | |
Chih-Hung Hsieh | 054dab1 | 2018-12-10 13:52:46 -0800 | [diff] [blame] | 93 | const uint32_t US_SPANISH = 0x65735553LU; // es-US |
| 94 | const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX |
| 95 | const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419 |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 96 | |
| 97 | // The two locales es-US and es-MX are treated as special fallbacks for es-419. |
| 98 | // If there is no es-419, they are considered its equivalent. |
| 99 | inline bool isSpecialSpanish(uint32_t language_and_region) { |
| 100 | return (language_and_region == US_SPANISH || language_and_region == MEXICAN_SPANISH); |
| 101 | } |
| 102 | |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 103 | int localeDataCompareRegions( |
| 104 | const char* left_region, const char* right_region, |
| 105 | const char* requested_language, const char* requested_script, |
| 106 | const char* requested_region) { |
| 107 | |
| 108 | if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) { |
| 109 | return 0; |
| 110 | } |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 111 | uint32_t left = packLocale(requested_language, left_region); |
| 112 | uint32_t right = packLocale(requested_language, right_region); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 113 | const uint32_t request = packLocale(requested_language, requested_region); |
| 114 | |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 115 | // If one and only one of the two locales is a special Spanish locale, we |
| 116 | // replace it with es-419. We don't do the replacement if the other locale |
| 117 | // is already es-419, or both locales are special Spanish locales (when |
| 118 | // es-US is being compared to es-MX). |
| 119 | const bool leftIsSpecialSpanish = isSpecialSpanish(left); |
| 120 | const bool rightIsSpecialSpanish = isSpecialSpanish(right); |
| 121 | if (leftIsSpecialSpanish && !rightIsSpecialSpanish && right != LATIN_AMERICAN_SPANISH) { |
| 122 | left = LATIN_AMERICAN_SPANISH; |
| 123 | } else if (rightIsSpecialSpanish && !leftIsSpecialSpanish && left != LATIN_AMERICAN_SPANISH) { |
| 124 | right = LATIN_AMERICAN_SPANISH; |
| 125 | } |
| 126 | |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 127 | uint32_t request_ancestors[MAX_PARENT_DEPTH+1]; |
| 128 | ssize_t left_right_index; |
| 129 | // Find the parents of the request, but stop as soon as we saw left or right |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 130 | const std::array<uint32_t, 2> left_and_right = {{left, right}}; |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 131 | const size_t ancestor_count = findAncestors( |
| 132 | request_ancestors, &left_right_index, |
| 133 | request, requested_script, |
Roozbeh Pournader | a192a8c | 2017-01-10 15:24:32 -0800 | [diff] [blame] | 134 | left_and_right.data(), left_and_right.size()); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 135 | if (left_right_index == 0) { // We saw left earlier |
| 136 | return 1; |
| 137 | } |
| 138 | if (left_right_index == 1) { // We saw right earlier |
| 139 | return -1; |
| 140 | } |
| 141 | |
| 142 | // If we are here, neither left nor right are an ancestor of the |
| 143 | // request. This means that all the ancestors have been computed and |
| 144 | // the last ancestor is just the language by itself. We will use the |
| 145 | // distance in the parent tree for determining the better match. |
| 146 | const size_t left_distance = findDistance( |
| 147 | left, requested_script, request_ancestors, ancestor_count); |
| 148 | const size_t right_distance = findDistance( |
| 149 | right, requested_script, request_ancestors, ancestor_count); |
| 150 | if (left_distance != right_distance) { |
| 151 | return (int) right_distance - (int) left_distance; // smaller distance is better |
| 152 | } |
| 153 | |
| 154 | // If we are here, left and right are equidistant from the request. We will |
| 155 | // try and see if any of them is a representative locale. |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 156 | const bool left_is_representative = isLocaleRepresentative(left, requested_script); |
| 157 | const bool right_is_representative = isLocaleRepresentative(right, requested_script); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 158 | if (left_is_representative != right_is_representative) { |
| 159 | return (int) left_is_representative - (int) right_is_representative; |
| 160 | } |
| 161 | |
| 162 | // We have no way of figuring out which locale is a better match. For |
| 163 | // the sake of stability, we consider the locale with the lower region |
| 164 | // code (in dictionary order) better, with two-letter codes before |
| 165 | // three-digit codes (since two-letter codes are more specific). |
| 166 | return (int64_t) right - (int64_t) left; |
| 167 | } |
| 168 | |
| 169 | void localeDataComputeScript(char out[4], const char* language, const char* region) { |
| 170 | if (language[0] == '\0') { |
| 171 | memset(out, '\0', SCRIPT_LENGTH); |
| 172 | return; |
| 173 | } |
| 174 | uint32_t lookup_key = packLocale(language, region); |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 175 | auto lookup_result = lookupLikelyScript(lookup_key); |
| 176 | if (lookup_result == nullptr) { |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 177 | // We couldn't find the locale. Let's try without the region |
| 178 | if (region[0] != '\0') { |
| 179 | lookup_key = dropRegion(lookup_key); |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 180 | lookup_result = lookupLikelyScript(lookup_key); |
| 181 | if (lookup_result != nullptr) { |
| 182 | memcpy(out, lookup_result, SCRIPT_LENGTH); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 183 | return; |
| 184 | } |
| 185 | } |
| 186 | // We don't know anything about the locale |
| 187 | memset(out, '\0', SCRIPT_LENGTH); |
| 188 | return; |
| 189 | } else { |
| 190 | // We found the locale. |
Victor Chang | a490e2c | 2024-12-27 14:04:08 +0000 | [diff] [blame] | 191 | memcpy(out, lookup_result, SCRIPT_LENGTH); |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 192 | } |
| 193 | } |
| 194 | |
Roozbeh Pournader | 27953c3 | 2016-02-01 13:49:52 -0800 | [diff] [blame] | 195 | const uint32_t ENGLISH_STOP_LIST[2] = { |
Chih-Hung Hsieh | 054dab1 | 2018-12-10 13:52:46 -0800 | [diff] [blame] | 196 | 0x656E0000LU, // en |
| 197 | 0x656E8400LU, // en-001 |
Roozbeh Pournader | 27953c3 | 2016-02-01 13:49:52 -0800 | [diff] [blame] | 198 | }; |
| 199 | const char ENGLISH_CHARS[2] = {'e', 'n'}; |
| 200 | const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'}; |
| 201 | |
| 202 | bool localeDataIsCloseToUsEnglish(const char* region) { |
| 203 | const uint32_t locale = packLocale(ENGLISH_CHARS, region); |
| 204 | ssize_t stop_list_index; |
| 205 | findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2); |
| 206 | // A locale is like US English if we see "en" before "en-001" in its ancestor list. |
| 207 | return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST |
| 208 | } |
| 209 | |
Roozbeh Pournader | b927c55 | 2016-01-15 11:23:42 -0800 | [diff] [blame] | 210 | } // namespace android |