blob: 09a8560f984ab7edd7331fd65c47bd88f7583fbe [file] [log] [blame]
Adam Lesinski393b5f02015-12-17 13:03:11 -08001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Adam Lesinskicacb28f2016-10-19 12:18:14 -070017#include "compile/PseudolocaleGenerator.h"
Adam Lesinskice5e56e2016-10-21 17:56:45 -070018
19#include <algorithm>
20
Adam Lesinski393b5f02015-12-17 13:03:11 -080021#include "ResourceTable.h"
22#include "ResourceValues.h"
23#include "ValueVisitor.h"
Jeremy Meyer56f36e82022-05-20 20:35:42 +000024#include "androidfw/Util.h"
Adam Lesinski393b5f02015-12-17 13:03:11 -080025#include "compile/Pseudolocalizer.h"
Adam Lesinski8049f3d2017-03-31 18:28:14 -070026#include "util/Util.h"
Adam Lesinski393b5f02015-12-17 13:03:11 -080027
MÃ¥rten Kongstad24c9aa62018-06-20 08:46:41 +020028using ::android::ConfigDescription;
Adam Lesinskid3ffa8442017-09-28 13:34:35 -070029using ::android::StringPiece;
30using ::android::StringPiece16;
Adam Lesinskid5083f62017-01-16 15:07:21 -080031
Adam Lesinski393b5f02015-12-17 13:03:11 -080032namespace aapt {
33
Adam Lesinski8049f3d2017-03-31 18:28:14 -070034// The struct that represents both Span objects and UntranslatableSections.
35struct UnifiedSpan {
36 // Only present for Span objects. If not present, this was an UntranslatableSection.
Ryan Mitchell4382e442021-07-14 12:53:01 -070037 std::optional<std::string> tag;
Adam Lesinski8049f3d2017-03-31 18:28:14 -070038
39 // The UTF-16 index into the string where this span starts.
40 uint32_t first_char;
41
42 // The UTF-16 index into the string where this span ends, inclusive.
43 uint32_t last_char;
44};
45
46inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
47 if (left.first_char < right.first_char) {
48 return true;
49 } else if (left.first_char > right.first_char) {
50 return false;
51 } else if (left.last_char < right.last_char) {
52 return true;
53 }
54 return false;
55}
56
Jeremy Meyer56f36e82022-05-20 20:35:42 +000057inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) {
Adam Lesinski8049f3d2017-03-31 18:28:14 -070058 return UnifiedSpan{*span.name, span.first_char, span.last_char};
59}
60
61inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
62 return UnifiedSpan{
63 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
64}
65
66// Merges the Span and UntranslatableSections of this StyledString into a single vector of
67// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
68static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
69 // Ensure the Spans are sorted and converted.
70 std::vector<UnifiedSpan> sorted_spans;
71 sorted_spans.reserve(string.value->spans.size());
72 std::transform(string.value->spans.begin(), string.value->spans.end(),
73 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
74
75 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
76 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
77
78 // Ensure the UntranslatableSections are sorted and converted.
79 std::vector<UnifiedSpan> sorted_untranslatable_sections;
80 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
81 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
82 std::back_inserter(sorted_untranslatable_sections),
83 UntranslatableSectionToUnifiedSpan);
84 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
85
86 std::vector<UnifiedSpan> merged_spans;
87 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
88 auto span_iter = sorted_spans.begin();
89 auto untranslatable_iter = sorted_untranslatable_sections.begin();
90 while (span_iter != sorted_spans.end() &&
91 untranslatable_iter != sorted_untranslatable_sections.end()) {
92 if (*span_iter < *untranslatable_iter) {
93 merged_spans.push_back(std::move(*span_iter));
94 ++span_iter;
95 } else {
96 merged_spans.push_back(std::move(*untranslatable_iter));
97 ++untranslatable_iter;
98 }
99 }
100
101 while (span_iter != sorted_spans.end()) {
102 merged_spans.push_back(std::move(*span_iter));
103 ++span_iter;
104 }
105
106 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
107 merged_spans.push_back(std::move(*untranslatable_iter));
108 ++untranslatable_iter;
109 }
110 return merged_spans;
111}
112
113std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
114 Pseudolocalizer::Method method,
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000115 android::StringPool* pool) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700116 Pseudolocalizer localizer(method);
Adam Lesinski393b5f02015-12-17 13:03:11 -0800117
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700118 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
119 // This will effectively subdivide the string into multiple sections that can be individually
120 // pseudolocalized, while keeping the span indices synchronized.
121 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
122
123 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
124 // runtime. So we will do all our processing in UTF-16, then convert back.
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000125 const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value);
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700126
127 // Convenient wrapper around the text that allows us to work with StringPieces.
128 const StringPiece16 text(text16);
129
130 // The new string.
131 std::string new_string = localizer.Start();
132
133 // The stack that keeps track of what nested Span we're in.
134 std::vector<size_t> span_stack;
135
136 // The current position in the original text.
137 uint32_t cursor = 0u;
138
139 // The current position in the new text.
140 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
141 new_string.size(), false);
142
143 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
144 bool translatable = true;
145 size_t span_idx = 0u;
146 while (span_idx < merged_spans.size() || !span_stack.empty()) {
147 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
148 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
149
150 if (span != nullptr) {
151 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
152 // There is no parent, or this span is the child of the parent.
153 // Pseudolocalize all the text until this span.
154 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
155 cursor += substr.size();
156
157 // Pseudolocalize the substring.
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000158 std::string new_substr = android::util::Utf16ToUtf8(substr);
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700159 if (translatable) {
160 new_substr = localizer.Text(new_substr);
161 }
162 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
163 new_substr.size(), false);
164 new_string += new_substr;
165
166 // Rewrite the first_char.
167 span->first_char = new_cursor;
168 if (!span->tag) {
169 // An untranslatable section has begun!
170 translatable = false;
171 }
172 span_stack.push_back(span_idx);
173 ++span_idx;
174 continue;
175 }
176 }
177
178 if (parent_span != nullptr) {
179 // There is a parent, and either this span is not a child of it, or there are no more spans.
180 // Pop this off the stack.
181 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
182 cursor += substr.size();
183
184 // Pseudolocalize the substring.
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000185 std::string new_substr = android::util::Utf16ToUtf8(substr);
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700186 if (translatable) {
187 new_substr = localizer.Text(new_substr);
188 }
189 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
190 new_substr.size(), false);
191 new_string += new_substr;
192
193 parent_span->last_char = new_cursor - 1;
194 if (parent_span->tag) {
195 // An end to an untranslatable section.
196 translatable = true;
197 }
198 span_stack.pop_back();
199 }
200 }
201
202 // Finish the pseudolocalization at the end of the string.
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000203 new_string +=
204 localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700205 new_string += localizer.End();
Adam Lesinski393b5f02015-12-17 13:03:11 -0800206
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000207 android::StyleString localized;
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700208 localized.str = std::move(new_string);
Adam Lesinski393b5f02015-12-17 13:03:11 -0800209
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700210 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
211 for (UnifiedSpan& span : merged_spans) {
212 if (span.tag) {
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000213 localized.spans.push_back(
214 android::Span{std::move(span.tag.value()), span.first_char, span.last_char});
Adam Lesinski75421622017-01-06 15:20:04 -0800215 }
216 }
Adam Lesinski75421622017-01-06 15:20:04 -0800217 return util::make_unique<StyledString>(pool->MakeRef(localized));
Adam Lesinski393b5f02015-12-17 13:03:11 -0800218}
219
220namespace {
221
Adam Lesinskid3ffa8442017-09-28 13:34:35 -0700222class Visitor : public ValueVisitor {
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700223 public:
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700224 // Either value or item will be populated upon visiting the value.
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700225 std::unique_ptr<Value> value;
226 std::unique_ptr<Item> item;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800227
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000228 Visitor(android::StringPool* pool, Pseudolocalizer::Method method)
229 : pool_(pool), method_(method), localizer_(method) {
230 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800231
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700232 void Visit(Plural* plural) override {
Ryan Mitchellefcdb952021-04-14 17:31:37 -0700233 CloningValueTransformer cloner(pool_);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700234 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
235 for (size_t i = 0; i < plural->values.size(); i++) {
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700236 Visitor sub_visitor(pool_, method_);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700237 if (plural->values[i]) {
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700238 plural->values[i]->Accept(&sub_visitor);
Donald Chai4c3da0f2019-05-31 23:52:21 -0700239 if (sub_visitor.item) {
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700240 localized->values[i] = std::move(sub_visitor.item);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700241 } else {
Ryan Mitchellefcdb952021-04-14 17:31:37 -0700242 localized->values[i] = plural->values[i]->Transform(cloner);
Adam Lesinski393b5f02015-12-17 13:03:11 -0800243 }
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700244 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800245 }
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700246 localized->SetSource(plural->GetSource());
247 localized->SetWeak(true);
248 value = std::move(localized);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700249 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800250
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700251 void Visit(String* string) override {
Adam Lesinski75421622017-01-06 15:20:04 -0800252 const StringPiece original_string = *string->value;
253 std::string result = localizer_.Start();
254
255 // Pseudolocalize only the translatable sections.
256 size_t start = 0u;
257 for (const UntranslatableSection& section : string->untranslatable_sections) {
258 // Pseudolocalize the content before the untranslatable section.
259 const size_t len = section.start - start;
260 if (len > 0u) {
261 result += localizer_.Text(original_string.substr(start, len));
262 }
263
264 // Copy the untranslatable content.
265 result += original_string.substr(section.start, section.end - section.start);
266 start = section.end;
267 }
268
269 // Pseudolocalize the content after the last untranslatable section.
270 if (start != original_string.size()) {
271 const size_t len = original_string.size() - start;
272 result += localizer_.Text(original_string.substr(start, len));
273 }
274 result += localizer_.End();
275
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700276 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700277 localized->SetSource(string->GetSource());
278 localized->SetWeak(true);
279 item = std::move(localized);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700280 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800281
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700282 void Visit(StyledString* string) override {
283 item = PseudolocalizeStyledString(string, method_, pool_);
Adam Lesinski75421622017-01-06 15:20:04 -0800284 item->SetSource(string->GetSource());
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700285 item->SetWeak(true);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700286 }
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700287
288 private:
289 DISALLOW_COPY_AND_ASSIGN(Visitor);
290
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000291 android::StringPool* pool_;
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700292 Pseudolocalizer::Method method_;
293 Pseudolocalizer localizer_;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800294};
295
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700296ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
Adam Lesinski393b5f02015-12-17 13:03:11 -0800297 Pseudolocalizer::Method m) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700298 ConfigDescription modified = base;
299 switch (m) {
Adam Lesinski393b5f02015-12-17 13:03:11 -0800300 case Pseudolocalizer::Method::kAccent:
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700301 modified.language[0] = 'e';
302 modified.language[1] = 'n';
303 modified.country[0] = 'X';
304 modified.country[1] = 'A';
305 break;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800306
307 case Pseudolocalizer::Method::kBidi:
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700308 modified.language[0] = 'a';
309 modified.language[1] = 'r';
310 modified.country[0] = 'X';
311 modified.country[1] = 'B';
312 break;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800313 default:
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700314 break;
315 }
316 return modified;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800317}
318
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700319void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
Jeremy Meyer56f36e82022-05-20 20:35:42 +0000320 ResourceConfigValue* original_value, android::StringPool* pool,
321 ResourceEntry* entry) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700322 Visitor visitor(pool, method);
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700323 original_value->value->Accept(&visitor);
Adam Lesinski393b5f02015-12-17 13:03:11 -0800324
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700325 std::unique_ptr<Value> localized_value;
326 if (visitor.value) {
327 localized_value = std::move(visitor.value);
328 } else if (visitor.item) {
329 localized_value = std::move(visitor.item);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700330 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800331
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700332 if (!localized_value) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700333 return;
334 }
Adam Lesinskie4bb9eb2016-02-12 22:18:51 -0800335
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700336 ConfigDescription config_with_accent =
337 ModifyConfigForPseudoLocale(original_value->config, method);
Adam Lesinskie4bb9eb2016-02-12 22:18:51 -0800338
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700339 ResourceConfigValue* new_config_value =
340 entry->FindOrCreateValue(config_with_accent, original_value->product);
341 if (!new_config_value->value) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700342 // Only use auto-generated pseudo-localization if none is defined.
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700343 new_config_value->value = std::move(localized_value);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700344 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800345}
346
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700347// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
348// translatable.
Adam Lesinskice5e56e2016-10-21 17:56:45 -0700349static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700350 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700351 if (diff & ConfigDescription::CONFIG_LOCALE) {
352 return false;
353 }
Adam Lesinski75421622017-01-06 15:20:04 -0800354 return config_value->value->IsTranslatable();
Adam Lesinski458b8772016-04-25 14:20:21 -0700355}
356
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700357} // namespace
Adam Lesinski393b5f02015-12-17 13:03:11 -0800358
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700359bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700360 for (auto& package : table->packages) {
361 for (auto& type : package->types) {
362 for (auto& entry : type->entries) {
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700363 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700364 for (ResourceConfigValue* value : values) {
Adam Lesinski8049f3d2017-03-31 18:28:14 -0700365 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
366 entry.get());
367 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
368 entry.get());
Adam Lesinski393b5f02015-12-17 13:03:11 -0800369 }
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700370 }
Adam Lesinski393b5f02015-12-17 13:03:11 -0800371 }
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700372 }
373 return true;
Adam Lesinski393b5f02015-12-17 13:03:11 -0800374}
375
Adam Lesinskicacb28f2016-10-19 12:18:14 -0700376} // namespace aapt