blob: 1b23d90c5ab338be7cf2ba64a3c9d71cbb3151f0 [file] [log] [blame]
Roozbeh Pournaderb927c552016-01-15 11:23:42 -08001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -080017#include <array>
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080018#include <cstdint>
19#include <cstdlib>
20#include <cstring>
21#include <string>
22#include <unordered_map>
23#include <unordered_set>
24
25#include <androidfw/LocaleData.h>
Victor Changa490e2c2024-12-27 14:04:08 +000026#include <androidfw/LocaleDataLookup.h>
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080027
28namespace android {
29
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080030const uint32_t PACKED_ROOT = 0; // to represent the root locale
Victor Changa490e2c2024-12-27 14:04:08 +000031const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth();
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080032
33uint32_t findParent(uint32_t packed_locale, const char* script) {
34 if (hasRegion(packed_locale)) {
Victor Changa490e2c2024-12-27 14:04:08 +000035 auto parent_key = findParentLocalePackedKey(script, packed_locale);
36 if (parent_key != 0) {
37 return parent_key;
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080038 }
39 return dropRegion(packed_locale);
40 }
41 return PACKED_ROOT;
42}
43
44// Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
45// space). If any of the members of stop_list was seen, write it in the
46// output but stop afterwards.
47//
48// This also outputs the index of the last written ancestor in the stop_list
49// to stop_list_index, which will be -1 if it is not found in the stop_list.
50//
51// Returns the number of ancestors written in the output, which is always
52// at least one.
Roozbeh Pournader27953c32016-02-01 13:49:52 -080053//
54// (If 'out' is nullptr, we do everything the same way but we simply don't write
55// any results in 'out'.)
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080056size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
57 uint32_t packed_locale, const char* script,
58 const uint32_t* stop_list, size_t stop_set_length) {
59 uint32_t ancestor = packed_locale;
60 size_t count = 0;
61 do {
Roozbeh Pournader27953c32016-02-01 13:49:52 -080062 if (out != nullptr) out[count] = ancestor;
63 count++;
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080064 for (size_t i = 0; i < stop_set_length; i++) {
65 if (stop_list[i] == ancestor) {
66 *stop_list_index = (ssize_t) i;
67 return count;
68 }
69 }
70 ancestor = findParent(ancestor, script);
71 } while (ancestor != PACKED_ROOT);
72 *stop_list_index = (ssize_t) -1;
73 return count;
74}
75
76size_t findDistance(uint32_t supported,
77 const char* script,
78 const uint32_t* request_ancestors,
79 size_t request_ancestors_count) {
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080080 ssize_t request_ancestors_index;
81 const size_t supported_ancestor_count = findAncestors(
Roozbeh Pournader27953c32016-02-01 13:49:52 -080082 nullptr, &request_ancestors_index,
Roozbeh Pournaderb927c552016-01-15 11:23:42 -080083 supported, script,
84 request_ancestors, request_ancestors_count);
85 // Since both locales share the same root, there will always be a shared
86 // ancestor, so the distance in the parent tree is the sum of the distance
87 // of 'supported' to the lowest common ancestor (number of ancestors
88 // written for 'supported' minus 1) plus the distance of 'request' to the
89 // lowest common ancestor (the index of the ancestor in request_ancestors).
90 return supported_ancestor_count + request_ancestors_index - 1;
91}
92
Chih-Hung Hsieh054dab12018-12-10 13:52:46 -080093const uint32_t US_SPANISH = 0x65735553LU; // es-US
94const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX
95const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -080096
97// The two locales es-US and es-MX are treated as special fallbacks for es-419.
98// If there is no es-419, they are considered its equivalent.
99inline bool isSpecialSpanish(uint32_t language_and_region) {
100 return (language_and_region == US_SPANISH || language_and_region == MEXICAN_SPANISH);
101}
102
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800103int localeDataCompareRegions(
104 const char* left_region, const char* right_region,
105 const char* requested_language, const char* requested_script,
106 const char* requested_region) {
107
108 if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
109 return 0;
110 }
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -0800111 uint32_t left = packLocale(requested_language, left_region);
112 uint32_t right = packLocale(requested_language, right_region);
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800113 const uint32_t request = packLocale(requested_language, requested_region);
114
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -0800115 // If one and only one of the two locales is a special Spanish locale, we
116 // replace it with es-419. We don't do the replacement if the other locale
117 // is already es-419, or both locales are special Spanish locales (when
118 // es-US is being compared to es-MX).
119 const bool leftIsSpecialSpanish = isSpecialSpanish(left);
120 const bool rightIsSpecialSpanish = isSpecialSpanish(right);
121 if (leftIsSpecialSpanish && !rightIsSpecialSpanish && right != LATIN_AMERICAN_SPANISH) {
122 left = LATIN_AMERICAN_SPANISH;
123 } else if (rightIsSpecialSpanish && !leftIsSpecialSpanish && left != LATIN_AMERICAN_SPANISH) {
124 right = LATIN_AMERICAN_SPANISH;
125 }
126
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800127 uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
128 ssize_t left_right_index;
129 // Find the parents of the request, but stop as soon as we saw left or right
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -0800130 const std::array<uint32_t, 2> left_and_right = {{left, right}};
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800131 const size_t ancestor_count = findAncestors(
132 request_ancestors, &left_right_index,
133 request, requested_script,
Roozbeh Pournadera192a8c2017-01-10 15:24:32 -0800134 left_and_right.data(), left_and_right.size());
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800135 if (left_right_index == 0) { // We saw left earlier
136 return 1;
137 }
138 if (left_right_index == 1) { // We saw right earlier
139 return -1;
140 }
141
142 // If we are here, neither left nor right are an ancestor of the
143 // request. This means that all the ancestors have been computed and
144 // the last ancestor is just the language by itself. We will use the
145 // distance in the parent tree for determining the better match.
146 const size_t left_distance = findDistance(
147 left, requested_script, request_ancestors, ancestor_count);
148 const size_t right_distance = findDistance(
149 right, requested_script, request_ancestors, ancestor_count);
150 if (left_distance != right_distance) {
151 return (int) right_distance - (int) left_distance; // smaller distance is better
152 }
153
154 // If we are here, left and right are equidistant from the request. We will
155 // try and see if any of them is a representative locale.
Victor Changa490e2c2024-12-27 14:04:08 +0000156 const bool left_is_representative = isLocaleRepresentative(left, requested_script);
157 const bool right_is_representative = isLocaleRepresentative(right, requested_script);
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800158 if (left_is_representative != right_is_representative) {
159 return (int) left_is_representative - (int) right_is_representative;
160 }
161
162 // We have no way of figuring out which locale is a better match. For
163 // the sake of stability, we consider the locale with the lower region
164 // code (in dictionary order) better, with two-letter codes before
165 // three-digit codes (since two-letter codes are more specific).
166 return (int64_t) right - (int64_t) left;
167}
168
169void localeDataComputeScript(char out[4], const char* language, const char* region) {
170 if (language[0] == '\0') {
171 memset(out, '\0', SCRIPT_LENGTH);
172 return;
173 }
174 uint32_t lookup_key = packLocale(language, region);
Victor Changa490e2c2024-12-27 14:04:08 +0000175 auto lookup_result = lookupLikelyScript(lookup_key);
176 if (lookup_result == nullptr) {
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800177 // We couldn't find the locale. Let's try without the region
178 if (region[0] != '\0') {
179 lookup_key = dropRegion(lookup_key);
Victor Changa490e2c2024-12-27 14:04:08 +0000180 lookup_result = lookupLikelyScript(lookup_key);
181 if (lookup_result != nullptr) {
182 memcpy(out, lookup_result, SCRIPT_LENGTH);
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800183 return;
184 }
185 }
186 // We don't know anything about the locale
187 memset(out, '\0', SCRIPT_LENGTH);
188 return;
189 } else {
190 // We found the locale.
Victor Changa490e2c2024-12-27 14:04:08 +0000191 memcpy(out, lookup_result, SCRIPT_LENGTH);
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800192 }
193}
194
Roozbeh Pournader27953c32016-02-01 13:49:52 -0800195const uint32_t ENGLISH_STOP_LIST[2] = {
Chih-Hung Hsieh054dab12018-12-10 13:52:46 -0800196 0x656E0000LU, // en
197 0x656E8400LU, // en-001
Roozbeh Pournader27953c32016-02-01 13:49:52 -0800198};
199const char ENGLISH_CHARS[2] = {'e', 'n'};
200const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'};
201
202bool localeDataIsCloseToUsEnglish(const char* region) {
203 const uint32_t locale = packLocale(ENGLISH_CHARS, region);
204 ssize_t stop_list_index;
205 findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2);
206 // A locale is like US English if we see "en" before "en-001" in its ancestor list.
207 return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST
208}
209
Roozbeh Pournaderb927c552016-01-15 11:23:42 -0800210} // namespace android