AAPT2: Fix styled string whitespace processing Change styled string whitespace processing to be like AAPT's was. Main changes: - whitespace around tags is preserved. - tags start exactly where they are supposed to, not off by one. Bug: 72406283 Test: make aapt2_tests Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef

commit: 2eed52ecc0c2fa3e96530e4b5556eaa82f7c2dfc [log] [tgz]
author: Adam Lesinski <adamlesinski@google.com> Wed Feb 21 15:55:58 2018 -0800
committer: Adam Lesinski <adamlesinski@google.com> Tue Feb 27 11:39:10 2018 -0800
tree: 4e0a49770f684a2ca823d958c0f1a2b3adabcab9
parent: e1094a2e232277a719025aa5c97c492502c34f5b [diff] [blame]
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index e42145d..d1c9ca1 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp

@@ -76,6 +76,34 @@
   return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
 }
 
+StringPiece TrimLeadingWhitespace(const StringPiece& str) {
+  if (str.size() == 0 || str.data() == nullptr) {
+    return str;
+  }
+
+  const char* start = str.data();
+  const char* end = start + str.length();
+
+  while (start != end && isspace(*start)) {
+    start++;
+  }
+  return StringPiece(start, end - start);
+}
+
+StringPiece TrimTrailingWhitespace(const StringPiece& str) {
+  if (str.size() == 0 || str.data() == nullptr) {
+    return str;
+  }
+
+  const char* start = str.data();
+  const char* end = start + str.length();
+
+  while (end != start && isspace(*(end - 1))) {
+    end--;
+  }
+  return StringPiece(start, end - start);
+}
+
 StringPiece TrimWhitespace(const StringPiece& str) {
   if (str.size() == 0 || str.data() == nullptr) {
     return str;
@@ -269,162 +297,6 @@
   return true;
 }
 
-static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
-  ssize_t len = utf32_to_utf8_length(&codepoint, 1);
-  if (len < 0) {
-    return false;
-  }
-
-  const size_t start_append_pos = output->size();
-
-  // Make room for the next character.
-  output->resize(output->size() + len);
-
-  char* dst = &*(output->begin() + start_append_pos);
-  utf32_to_utf8(&codepoint, 1, dst, len + 1);
-  return true;
-}
-
-static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) {
-  char32_t code = 0;
-  for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
-    char32_t codepoint = iter->Next();
-    char32_t a;
-    if (codepoint >= U'0' && codepoint <= U'9') {
-      a = codepoint - U'0';
-    } else if (codepoint >= U'a' && codepoint <= U'f') {
-      a = codepoint - U'a' + 10;
-    } else if (codepoint >= U'A' && codepoint <= U'F') {
-      a = codepoint - U'A' + 10;
-    } else {
-      return {};
-    }
-    code = (code << 4) | a;
-  }
-  return AppendCodepointToUtf8String(code, output);
-}
-
-static bool IsCodepointSpace(char32_t codepoint) {
-  if (static_cast<uint32_t>(codepoint) & 0xffffff00u) {
-    return false;
-  }
-  return isspace(static_cast<char>(codepoint));
-}
-
-StringBuilder::StringBuilder(bool preserve_spaces) : preserve_spaces_(preserve_spaces) {
-}
-
-StringBuilder& StringBuilder::Append(const StringPiece& str) {
-  if (!error_.empty()) {
-    return *this;
-  }
-
-  // Where the new data will be appended to.
-  const size_t new_data_index = str_.size();
-
-  Utf8Iterator iter(str);
-  while (iter.HasNext()) {
-    const char32_t codepoint = iter.Next();
-
-    if (last_char_was_escape_) {
-      switch (codepoint) {
-        case U't':
-          str_ += '\t';
-          break;
-
-        case U'n':
-          str_ += '\n';
-          break;
-
-        case U'#':
-        case U'@':
-        case U'?':
-        case U'"':
-        case U'\'':
-        case U'\\':
-          str_ += static_cast<char>(codepoint);
-          break;
-
-        case U'u':
-          if (!AppendUnicodeCodepoint(&iter, &str_)) {
-            error_ = "invalid unicode escape sequence";
-            return *this;
-          }
-          break;
-
-        default:
-          // Ignore the escape character and just include the codepoint.
-          AppendCodepointToUtf8String(codepoint, &str_);
-          break;
-      }
-      last_char_was_escape_ = false;
-
-    } else if (!preserve_spaces_ && codepoint == U'"') {
-      if (!quote_ && trailing_space_) {
-        // We found an opening quote, and we have trailing space, so we should append that
-        // space now.
-        if (trailing_space_) {
-          // We had trailing whitespace, so replace with a single space.
-          if (!str_.empty()) {
-            str_ += ' ';
-          }
-          trailing_space_ = false;
-        }
-      }
-      quote_ = !quote_;
-
-    } else if (!preserve_spaces_ && codepoint == U'\'' && !quote_) {
-      // This should be escaped.
-      error_ = "unescaped apostrophe";
-      return *this;
-
-    } else if (codepoint == U'\\') {
-      // This is an escape sequence, convert to the real value.
-      if (!quote_ && trailing_space_) {
-        // We had trailing whitespace, so
-        // replace with a single space.
-        if (!str_.empty()) {
-          str_ += ' ';
-        }
-        trailing_space_ = false;
-      }
-      last_char_was_escape_ = true;
-    } else {
-      if (preserve_spaces_ || quote_) {
-        // Quotes mean everything is taken, including whitespace.
-        AppendCodepointToUtf8String(codepoint, &str_);
-      } else {
-        // This is not quoted text, so we will accumulate whitespace and only emit a single
-        // character of whitespace if it is followed by a non-whitespace character.
-        if (IsCodepointSpace(codepoint)) {
-          // We found whitespace.
-          trailing_space_ = true;
-        } else {
-          if (trailing_space_) {
-            // We saw trailing space before, so replace all
-            // that trailing space with one space.
-            if (!str_.empty()) {
-              str_ += ' ';
-            }
-            trailing_space_ = false;
-          }
-          AppendCodepointToUtf8String(codepoint, &str_);
-        }
-      }
-    }
-  }
-
-  // Accumulate the added string's UTF-16 length.
-  ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
-                                     str_.size() - new_data_index);
-  if (len < 0) {
-    error_ = "invalid unicode code point";
-    return *this;
-  }
-  utf16_len_ += len;
-  return *this;
-}
-
 std::u16string Utf8ToUtf16(const StringPiece& utf8) {
   ssize_t utf16_length = utf8_to_utf16_length(
       reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
commit	2eed52ecc0c2fa3e96530e4b5556eaa82f7c2dfc	[log] [tgz]
author	Adam Lesinski <adamlesinski@google.com>	Wed Feb 21 15:55:58 2018 -0800
committer	Adam Lesinski <adamlesinski@google.com>	Tue Feb 27 11:39:10 2018 -0800
tree	4e0a49770f684a2ca823d958c0f1a2b3adabcab9
parent	e1094a2e232277a719025aa5c97c492502c34f5b [diff] [blame]