Add lz4diff patch routines

Test: th
Bug: 206729162

Change-Id: I76a404e233e7683812bac65520c6c065e9a1ffe2
diff --git a/Android.bp b/Android.bp
index 81565e9..ae331d2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -186,11 +186,13 @@
         "libpayload_extent_utils",
         "libcow_operation_convert",
         "lz4diff-protos",
+        "liblz4patch",
     ],
     shared_libs: [
         "libbase",
         "libcrypto",
         "libfec",
+        "liblz4",
         "libziparchive",
     ],
 }
@@ -460,6 +462,7 @@
     shared_libs: [
         "libbase",
         "liblog",
+        "liblz4",
     ],
     static_libs: [
         "libpayload_consumer",
@@ -554,6 +557,7 @@
         "libcow_size_estimator",
         "liberofs",
         "lz4diff-protos",
+        "liblz4diff",
     ],
     shared_libs: [
         "libbase",
@@ -598,6 +602,51 @@
     ],
 }
 
+cc_defaults {
+    name: "liblz4diff_defaults",
+    static_libs: [
+        "lz4diff-protos",
+        "update_metadata-protos",
+        "libssl",
+        "libbsdiff",
+        "libpuffdiff",
+    ],
+    shared_libs: [
+        "liblz4",
+    ]
+}
+
+cc_library_static {
+    name: "liblz4diff",
+    host_supported: true,
+    defaults: ["ue_defaults", "liblz4diff_defaults"],
+    srcs: [
+        "lz4diff/lz4patch.cc",
+        "lz4diff/lz4diff_compress.cc",
+    ],
+}
+
+cc_library_static {
+    name: "liblz4patch",
+    host_supported: true,
+    recovery_available: true,
+    defaults: ["ue_defaults"],
+    static_libs: [
+        "lz4diff-protos",
+        "update_metadata-protos",
+        "libssl",
+        "libbspatch",
+        "libpuffpatch",
+    ],
+    shared_libs: [
+        "liblz4",
+    ],
+    srcs: [
+        "lz4diff/lz4patch.cc",
+        "lz4diff/lz4diff_compress.cc",
+    ],
+}
+
 cc_library_static {
     name: "libpayload_generator",
     defaults: [
@@ -618,8 +667,6 @@
         "payload_generator/deflate_utils.cc",
         "payload_generator/delta_diff_generator.cc",
         "payload_generator/delta_diff_utils.cc",
-        "lz4diff/lz4diff.cc",
-        "lz4diff/lz4diff_compress.cc",
         "payload_generator/ext2_filesystem.cc",
         "payload_generator/erofs_filesystem.cc",
         "payload_generator/extent_ranges.cc",
diff --git a/common/utils.cc b/common/utils.cc
index 8b57075..002e6a0 100644
--- a/common/utils.cc
+++ b/common/utils.cc
@@ -567,7 +567,7 @@
                      const string& fs_mount_options) {
   vector<const char*> fstypes;
   if (type.empty()) {
-    fstypes = {"ext2", "ext3", "ext4", "squashfs"};
+    fstypes = {"ext2", "ext3", "ext4", "squashfs", "erofs"};
   } else {
     fstypes = {type.c_str()};
   }
@@ -931,17 +931,34 @@
   }
   return true;
 }
+bool ReadExtents(const std::string& path,
+                 const vector<Extent>& extents,
+                 brillo::Blob* out_data,
+                 ssize_t out_data_size,
+                 size_t block_size) {
+  FileDescriptorPtr fd = std::make_shared<EintrSafeFileDescriptor>();
+  fd->Open(path.c_str(), O_RDONLY);
+  return ReadExtents(fd, extents, out_data, out_data_size, block_size);
+}
 
-bool ReadExtents(const string& path,
+bool ReadExtents(FileDescriptorPtr fd,
+                 const google::protobuf::RepeatedPtrField<Extent>& extents,
+                 brillo::Blob* out_data,
+                 size_t block_size) {
+  return ReadExtents(fd,
+                     {extents.begin(), extents.end()},
+                     out_data,
+                     utils::BlocksInExtents(extents) * block_size,
+                     block_size);
+}
+
+bool ReadExtents(FileDescriptorPtr fd,
                  const vector<Extent>& extents,
                  brillo::Blob* out_data,
                  ssize_t out_data_size,
                  size_t block_size) {
   brillo::Blob data(out_data_size);
   ssize_t bytes_read = 0;
-  int fd = open(path.c_str(), O_RDONLY);
-  TEST_AND_RETURN_FALSE_ERRNO(fd >= 0);
-  ScopedFdCloser fd_closer(&fd);
 
   for (const Extent& extent : extents) {
     ssize_t bytes_read_this_iteration = 0;
diff --git a/common/utils.h b/common/utils.h
index a0a466d..46e41f0 100644
--- a/common/utils.h
+++ b/common/utils.h
@@ -312,6 +312,12 @@
                  ssize_t out_data_size,
                  size_t block_size);
 
+bool ReadExtents(FileDescriptorPtr path,
+                 const std::vector<Extent>& extents,
+                 brillo::Blob* out_data,
+                 ssize_t out_data_size,
+                 size_t block_size);
+
 bool WriteExtents(const std::string& path,
                   const google::protobuf::RepeatedPtrField<Extent>& extents,
                   const brillo::Blob& data,
@@ -333,6 +339,11 @@
                  brillo::Blob* out_data,
                  size_t block_size);
 
+bool ReadExtents(FileDescriptorPtr path,
+                 const google::protobuf::RepeatedPtrField<Extent>& extents,
+                 brillo::Blob* out_data,
+                 size_t block_size);
+
 // Read the current boot identifier and store it in |boot_id|. This identifier
 // is constants during the same boot of the kernel and is regenerated after
 // reboot. Returns whether it succeeded getting the boot_id.
diff --git a/lz4diff/lz4diff.cc b/lz4diff/lz4diff.cc
index 5525dd1..3e4d3b4 100644
--- a/lz4diff/lz4diff.cc
+++ b/lz4diff/lz4diff.cc
@@ -165,6 +165,8 @@
 static void StoreSrcCompressedFileInfo(const CompressedFile& src_file_info,
                                        Lz4diffHeader* header) {
   *header->mutable_src_info()->mutable_algo() = src_file_info.algo;
+  header->mutable_src_info()->set_zero_padding_enabled(
+      src_file_info.zero_padding_enabled);
   auto& src_blocks = *header->mutable_src_info()->mutable_block_info();
   src_blocks.Clear();
   for (const auto& block : src_file_info.blocks) {
diff --git a/lz4diff/lz4patch.cc b/lz4diff/lz4patch.cc
new file mode 100644
index 0000000..1dcf933
--- /dev/null
+++ b/lz4diff/lz4patch.cc
@@ -0,0 +1,322 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "lz4patch.h"
+
+#include <endian.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <algorithm>
+#include <string_view>
+
+#include <bsdiff/bspatch.h>
+#include <bsdiff/memory_file.h>
+#include <bsdiff/file.h>
+#include <puffin/memory_stream.h>
+
+#include "android-base/strings.h"
+#include "lz4diff/lz4diff.h"
+#include "lz4diff_compress.h"
+#include "lz4diff_format.h"
+#include "puffin/puffpatch.h"
+#include "update_engine/common/hash_calculator.h"
+#include "update_engine/common/utils.h"
+
+namespace chromeos_update_engine {
+
+namespace {
+
+template <typename T>
+constexpr void BigEndianToHost(T& t) {
+  static_assert(std::is_integral_v<T>);
+  static_assert(sizeof(t) == 4 || sizeof(t) == 8 || sizeof(t) == 2);
+  if constexpr (sizeof(t) == 4) {
+    t = be32toh(t);
+  } else if constexpr (sizeof(t) == 8) {
+    t = be64toh(t);
+  } else if constexpr (sizeof(t) == 2) {
+    t = be16toh(t);
+  }
+}
+
+// In memory representation of an LZ4Diff patch, it's not marked as packed
+// because parsing isn't as simple as reinterpret_cast<> any way.
+struct Lz4diffPatch {
+  char magic[kLz4diffMagic.size()];
+  uint32_t version;
+  uint32_t pb_header_size;  // size of protobuf message
+  Lz4diffHeader pb_header;
+  std::string_view inner_patch;
+};
+
+// Utility class to interact with puffin API. C++ does not have standard
+// Read/Write trait. So everybody invent their own file descriptor wrapper.
+class StringViewStream : public puffin::StreamInterface {
+ public:
+  ~StringViewStream() override = default;
+
+  bool GetSize(uint64_t* size) const override {
+    *size = read_memory_.size();
+    return true;
+  }
+
+  bool GetOffset(uint64_t* offset) const override {
+    *offset = offset_;
+    return true;
+  }
+
+  bool Seek(uint64_t offset) override {
+    TEST_AND_RETURN_FALSE(open_);
+    uint64_t size;
+    GetSize(&size);
+    TEST_AND_RETURN_FALSE(offset <= size);
+    offset_ = offset;
+    return true;
+  }
+
+  bool Read(void* buffer, size_t length) override {
+    TEST_AND_RETURN_FALSE(open_);
+    TEST_AND_RETURN_FALSE(offset_ + length <= read_memory_.size());
+    memcpy(buffer, read_memory_.data() + offset_, length);
+    offset_ += length;
+    return true;
+  }
+
+  bool Write(const void* buffer, size_t length) override {
+    LOG(ERROR) << "Unsupported operation " << __FUNCTION__;
+    return false;
+  }
+
+  bool Close() override {
+    open_ = false;
+    return true;
+  }
+
+  constexpr StringViewStream(std::string_view read_memory)
+      : read_memory_(read_memory) {
+    CHECK(!read_memory.empty());
+  }
+
+ private:
+  // The memory buffer for reading.
+  std::string_view read_memory_;
+
+  // The current offset.
+  uint64_t offset_{};
+  bool open_{true};
+};
+
+bool ParseLz4DifffPatch(std::string_view patch_data, Lz4diffPatch* output) {
+  CHECK_NE(output, nullptr);
+  if (!android::base::StartsWith(patch_data, kLz4diffMagic)) {
+    LOG(ERROR) << "Invalid lz4diff magic: "
+               << HexEncode(patch_data.substr(0, kLz4diffMagic.size()))
+               << ", expected: " << HexEncode(kLz4diffMagic);
+    return false;
+  }
+  Lz4diffPatch& patch = *output;
+  std::memcpy(patch.magic, patch_data.data(), kLz4diffMagic.size());
+  std::memcpy(&patch.version,
+              patch_data.data() + kLz4diffMagic.size(),
+              sizeof(patch.version));
+  BigEndianToHost(patch.version);
+  if (patch.version != kLz4diffVersion) {
+    LOG(ERROR) << "Unsupported lz4diff version: " << patch.version
+               << ", supported version: " << kLz4diffVersion;
+    return false;
+  }
+  std::memcpy(&patch.pb_header_size,
+              patch_data.data() + kLz4diffMagic.size() + sizeof(patch.version),
+              sizeof(patch.pb_header_size));
+  BigEndianToHost(patch.pb_header_size);
+  TEST_AND_RETURN_FALSE(patch.pb_header.ParseFromArray(
+      patch_data.data() + kLz4diffHeaderSize, patch.pb_header_size));
+  patch.inner_patch =
+      patch_data.substr(kLz4diffHeaderSize + patch.pb_header_size);
+  return true;
+}
+
+bool bspatch(std::string_view input_data,
+             std::string_view patch_data,
+             Blob* output) {
+  CHECK_NE(output, nullptr);
+  output->clear();
+  CHECK_GT(patch_data.size(), 0UL);
+  int err =
+      bsdiff::bspatch(reinterpret_cast<const uint8_t*>(input_data.data()),
+                      input_data.size(),
+                      reinterpret_cast<const uint8_t*>(patch_data.data()),
+                      patch_data.size(),
+                      [output](const uint8_t* data, size_t size) -> size_t {
+                        output->insert(output->end(), data, data + size);
+                        return size;
+                      });
+  return err == 0;
+}
+
+bool ApplyPostfixPatch(
+    std::string_view recompressed_blob,
+    const google::protobuf::RepeatedPtrField<CompressedBlockInfo>&
+        dst_block_info,
+    Blob* output) {
+  // Output size should be always identical to size of recompressed_blob
+  output->clear();
+  output->reserve(recompressed_blob.size());
+  size_t offset = 0;
+  for (const auto& block_info : dst_block_info) {
+    auto block =
+        recompressed_blob.substr(offset, block_info.compressed_length());
+    if (!block_info.sha256_hash().empty()) {
+      Blob actual_hash;
+      CHECK(HashCalculator::RawHashOfBytes(
+          block.data(), block.size(), &actual_hash));
+      if (ToStringView(actual_hash) != block_info.sha256_hash()) {
+        LOG(ERROR) << "Block " << block_info
+                   << " is corrupted. This usually means the patch generator "
+                      "used a different version of LZ4, or an incompatible LZ4 "
+                      "patch generator was used, or LZ4 produces different "
+                      "output on different platforms. Expected hash: "
+                   << HexEncode(block_info.sha256_hash())
+                   << ", actual hash: " << HexEncode(actual_hash);
+      }
+    }
+    if (!block_info.postfix_bspatch().empty()) {
+      Blob fixed_block;
+      TEST_AND_RETURN_FALSE(
+          bspatch(block, block_info.postfix_bspatch(), &fixed_block));
+      output->insert(output->end(), fixed_block.begin(), fixed_block.end());
+    } else {
+      output->insert(output->end(), block.begin(), block.end());
+    }
+    offset += block_info.compressed_length();
+  }
+  return true;
+}
+
+bool puffpatch(std::string_view input_data,
+               std::string_view patch_data,
+               Blob* output) {
+  return puffin::PuffPatch(std::make_unique<StringViewStream>(input_data),
+                           puffin::MemoryStream::CreateForWrite(output),
+                           reinterpret_cast<const uint8_t*>(patch_data.data()),
+                           patch_data.size());
+}
+
+std::vector<CompressedBlock> ToCompressedBlockVec(
+    const google::protobuf::RepeatedPtrField<CompressedBlockInfo>& rpf) {
+  std::vector<CompressedBlock> ret;
+  for (const auto& block : rpf) {
+    auto& info = ret.emplace_back();
+    info.compressed_length = block.compressed_length();
+    info.uncompressed_length = block.uncompressed_length();
+    info.uncompressed_offset = block.uncompressed_offset();
+  }
+  return ret;
+}
+
+bool HasPosfixPatches(const Lz4diffPatch& patch) {
+  for (const auto& info : patch.pb_header.dst_info().block_info()) {
+    if (!info.postfix_bspatch().empty()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+bool Lz4Patch(std::string_view src_data,
+              std::string_view patch_data,
+              Blob* output) {
+  Lz4diffPatch patch;
+  TEST_AND_RETURN_FALSE(ParseLz4DifffPatch(patch_data, &patch));
+
+  Blob decompressed_dst;
+  // This scope is here just so that |decompressed_src| can be freed earlier
+  // than function scope.
+  // This whole patching algorithm has non-trivial memory usage, as it needs to
+  // load source data in to memory and decompress that. Now both src and
+  // decompressed src data are in memory.
+  // TODO(b/206729162) Make lz4diff more memory efficient and more streaming
+  // friendly.
+  {
+    const auto decompressed_src = TryDecompressBlob(
+        src_data,
+        ToCompressedBlockVec(patch.pb_header.src_info().block_info()),
+        patch.pb_header.src_info().zero_padding_enabled());
+    switch (patch.pb_header.inner_type()) {
+      case InnerPatchType::BSDIFF:
+        TEST_AND_RETURN_FALSE(bspatch(ToStringView(decompressed_src),
+                                      patch.inner_patch,
+                                      &decompressed_dst));
+        break;
+      case InnerPatchType::PUFFDIFF:
+        TEST_AND_RETURN_FALSE(puffpatch(ToStringView(decompressed_src),
+                                        patch.inner_patch,
+                                        &decompressed_dst));
+        break;
+      default:
+        LOG(ERROR) << "Unsupported patch type: "
+                   << patch.pb_header.inner_type();
+        return false;
+    }
+  }
+
+  auto recompressed_dst = TryCompressBlob(
+      ToStringView(decompressed_dst),
+      ToCompressedBlockVec(patch.pb_header.dst_info().block_info()),
+      patch.pb_header.dst_info().zero_padding_enabled(),
+      patch.pb_header.dst_info().algo());
+  TEST_AND_RETURN_FALSE(recompressed_dst.size() > 0);
+  // free memory used by |decompressed_dst|.
+  decompressed_dst = {};
+
+  if (HasPosfixPatches(patch)) {
+    TEST_AND_RETURN_FALSE(
+        ApplyPostfixPatch(ToStringView(recompressed_dst),
+                          patch.pb_header.dst_info().block_info(),
+                          output));
+  } else {
+    *output = std::move(recompressed_dst);
+  }
+
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& out, const CompressionAlgorithm& info) {
+  out << "Algo {type: " << info.Type_Name(info.type());
+  if (info.level() != 0) {
+    out << ", level: " << info.level();
+  }
+  out << "}";
+
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const CompressionInfo& info) {
+  out << "CompressionInfo {block_info: " << info.block_info()
+      << ", algo: " << info.algo() << "}";
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const Lz4diffHeader& header) {
+  out << "Lz4diffHeader {src_info: " << header.src_info()
+      << ", dst_info: " << header.dst_info() << "}";
+  return out;
+}
+
+}  // namespace chromeos_update_engine
diff --git a/lz4diff/lz4patch.h b/lz4diff/lz4patch.h
new file mode 100644
index 0000000..5610891
--- /dev/null
+++ b/lz4diff/lz4patch.h
@@ -0,0 +1,50 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef UPDATE_ENGINE_LZ4DIFF_LZ4PATCH_H_
+#define UPDATE_ENGINE_LZ4DIFF_LZ4PATCH_H_
+
+#include "lz4diff_format.h"
+
+namespace chromeos_update_engine {
+bool Lz4Patch(std::string_view src_data,
+              std::string_view patch_data,
+              Blob* output);
+
+std::ostream& operator<<(std::ostream& out, const Lz4diffHeader&);
+
+template <typename T>
+std::ostream& operator<<(std::ostream& out,
+                         const google::protobuf::RepeatedPtrField<T>& arr) {
+  if (arr.empty()) {
+    out << "[]";
+    return out;
+  }
+  out << "[";
+  auto begin = arr.begin();
+  out << *begin;
+  ++begin;
+  for (; begin != arr.end(); ++begin) {
+    out << ", " << *begin;
+  }
+  out << "]";
+
+  return out;
+}
+
+}  // namespace chromeos_update_engine
+
+#endif