Add parser for EROFS file data

This CL parses file names and compression info from input EROFS image,
so that OTA generation can use it later.

Test: th
Test: Generate an incremental OTA with erofs images, make sure we see
file lists.
Bug: 206729162

Change-Id: I70536d7762acc19a6977460738065d5557338532
diff --git a/Android.bp b/Android.bp
index 19026ac..2a6b2a4 100644
--- a/Android.bp
+++ b/Android.bp
@@ -531,6 +531,7 @@
     defaults: [
         "libpayload_consumer_exports",
         "update_metadata-protos_exports",
+        "erofs-utils_export_defaults",
     ],
 
     header_libs: [
@@ -550,10 +551,15 @@
         "update_metadata-protos",
         "libpayload_extent_utils",
         "libcow_size_estimator",
+        "liberofs",
+        "lz4diff-protos",
     ],
     shared_libs: [
         "libbase",
         "libext2fs",
+        // LZ4 has to be a shared lib, as we want to override it with
+        // LD_LIBRARY_PRELOAD later
+        "liblz4",
     ],
 }
 
@@ -612,6 +618,7 @@
         "payload_generator/delta_diff_generator.cc",
         "payload_generator/delta_diff_utils.cc",
         "payload_generator/ext2_filesystem.cc",
+        "payload_generator/erofs_filesystem.cc",
         "payload_generator/extent_ranges.cc",
         "payload_generator/full_update_generator.cc",
         "payload_generator/mapfile_filesystem.cc",
diff --git a/lz4diff/lz4diff_format.h b/lz4diff/lz4diff_format.h
index 5b5ce40..9875c12 100644
--- a/lz4diff/lz4diff_format.h
+++ b/lz4diff/lz4diff_format.h
@@ -58,8 +58,12 @@
 };
 
 struct CompressedFile {
+  // Extents in this array should be in range [0, file_size]. It represents
+  // which bytes inside this file are compressed. Useful for compressed file
+  // systems like EROFS.
   std::vector<CompressedBlock> blocks;
   CompressionAlgorithm algo;
+  // Whether the EROFS zero padding feature is enabled
   bool zero_padding_enabled{};
 };
 
diff --git a/payload_consumer/filesystem_verifier_action.cc b/payload_consumer/filesystem_verifier_action.cc
index 0da3fba..a8b9269 100644
--- a/payload_consumer/filesystem_verifier_action.cc
+++ b/payload_consumer/filesystem_verifier_action.cc
@@ -36,7 +36,6 @@
 #include <brillo/streams/file_stream.h>
 
 #include "common/error_code.h"
-#include "payload_generator/delta_diff_generator.h"
 #include "update_engine/common/utils.h"
 #include "update_engine/payload_consumer/file_descriptor.h"
 
diff --git a/payload_generator/delta_diff_generator.h b/payload_generator/delta_diff_generator.h
index b11f30a..2ffca38 100644
--- a/payload_generator/delta_diff_generator.h
+++ b/payload_generator/delta_diff_generator.h
@@ -24,6 +24,7 @@
 namespace chromeos_update_engine {
 
 constexpr size_t kBlockSize = 4096;
+
 extern const size_t kRootFSPartitionSize;
 
 // The |config| describes the payload generation request, describing both
diff --git a/payload_generator/erofs_filesystem.cc b/payload_generator/erofs_filesystem.cc
new file mode 100644
index 0000000..9ab37fd
--- /dev/null
+++ b/payload_generator/erofs_filesystem.cc
@@ -0,0 +1,240 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "update_engine/payload_generator/erofs_filesystem.h"
+
+#include <time.h>
+
+#include <string>
+#include <mutex>
+
+#include <erofs/internal.h>
+#include <erofs/dir.h>
+#include <erofs/io.h>
+
+#include "erofs_iterate.h"
+#include "lz4diff/lz4diff.pb.h"
+#include "payload_generator/filesystem_interface.h"
+#include "update_engine/common/utils.h"
+#include "update_engine/payload_generator/delta_diff_generator.h"
+#include "update_engine/payload_generator/extent_ranges.h"
+#include "update_engine/payload_generator/extent_utils.h"
+
+namespace chromeos_update_engine {
+
+namespace {
+
+static constexpr int GetOccupiedSize(const struct erofs_inode* inode,
+                                     erofs_off_t* size) {
+  *size = 0;
+  switch (inode->datalayout) {
+    case EROFS_INODE_FLAT_INLINE:
+    case EROFS_INODE_FLAT_PLAIN:
+    case EROFS_INODE_CHUNK_BASED:
+      *size = inode->i_size;
+      break;
+    case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+    case EROFS_INODE_FLAT_COMPRESSION:
+      *size = inode->u.i_blocks * EROFS_BLKSIZ;
+      break;
+    default:
+      LOG(ERROR) << "unknown datalayout " << inode->datalayout;
+      return -1;
+  }
+  return 0;
+}
+
+static int ErofsMapBlocks(struct erofs_inode* inode,
+                          struct erofs_map_blocks* map,
+                          int flags) {
+  if (erofs_inode_is_data_compressed(inode->datalayout)) {
+    return z_erofs_map_blocks_iter(inode, map, flags);
+  }
+  return erofs_map_blocks(inode, map, flags);
+}
+
+static constexpr bool IsBlockCompressed(const struct erofs_map_blocks& block) {
+  // Z_EROFS_COMPRESSION_SHIFTED means data inside this block are merely
+  // memmove()'ed in place, instead of going through some compression function
+  // like LZ4 or LZMA
+  return block.m_flags & EROFS_MAP_ENCODED &&
+         block.m_algorithmformat != Z_EROFS_COMPRESSION_SHIFTED;
+}
+
+static void FillCompressedBlockInfo(FilesystemInterface::File* p_file,
+                                    std::string_view image_filename,
+                                    struct erofs_inode* inode) {
+  auto& file = *p_file;
+  if (!file.is_compressed) {
+    return;
+  }
+  // TODO(b/206729162) Fill in compression algorithm info from input target
+  // files
+  file.compressed_file_info.algo.set_type(CompressionAlgorithm::LZ4HC);
+  file.compressed_file_info.algo.set_level(9);
+
+  struct erofs_map_blocks block {};
+  block.m_la = 0;
+  block.index = UINT_MAX;
+
+  const erofs_off_t uncompressed_size = file.file_stat.st_size;
+  auto& compressed_blocks = file.compressed_file_info.blocks;
+  auto last_pa = block.m_pa;
+  auto last_plen = 0;
+  while (block.m_la < uncompressed_size) {
+    auto error = ErofsMapBlocks(inode, &block, EROFS_GET_BLOCKS_FIEMAP);
+    if (error) {
+      LOG(FATAL) << "Failed to map blocks for " << file.name << " in "
+                 << image_filename;
+    }
+    // Certain uncompressed blocks have physical size > logical size. Usually
+    // the physical block contains bunch of trailing zeros. Include thees
+    // bytes in the logical size as well.
+    if (!IsBlockCompressed(block)) {
+      CHECK_LE(block.m_llen, block.m_plen);
+      block.m_llen = block.m_plen;
+    }
+
+    if (last_pa + last_plen != block.m_pa) {
+      if (last_plen != 0) {
+        file.extents.push_back(ExtentForRange(
+            last_pa / kBlockSize, utils::DivRoundUp(last_plen, kBlockSize)));
+      }
+      last_pa = block.m_pa;
+      last_plen = block.m_plen;
+    } else {
+      last_plen += block.m_plen;
+    }
+    // If logical size and physical size are the same, this block is
+    // uncompressed. Join consecutive uncompressed blocks to save a bit memory
+    // storing metadata.
+    if (block.m_llen == block.m_plen && !compressed_blocks.empty() &&
+        !compressed_blocks.back().IsCompressed()) {
+      compressed_blocks.back().compressed_length += block.m_llen;
+      compressed_blocks.back().uncompressed_length += block.m_llen;
+    } else {
+      compressed_blocks.push_back(
+          CompressedBlock(block.m_la, block.m_plen, block.m_llen));
+    }
+
+    block.m_la += block.m_llen;
+  }
+  file.extents.push_back(ExtentForRange(
+      last_pa / kBlockSize, utils::DivRoundUp(last_plen, kBlockSize)));
+  return;
+}
+
+}  // namespace
+
+static_assert(kBlockSize == EROFS_BLKSIZ);
+
+std::unique_ptr<ErofsFilesystem> ErofsFilesystem::CreateFromFile(
+    const std::string& filename) {
+  // erofs-utils makes heavy use of global variables. Hence its functions aren't
+  // thread safe. For example, it stores a global int holding file descriptors
+  // to the opened EROFS image. It doesn't even support opening more than 1
+  // imaeg at a time.
+  // TODO(b/202784930) Replace erofs-utils with a cleaner and more C++ friendly
+  // library. (Or turn erofs-utils into one)
+  static std::mutex m;
+  std::lock_guard g{m};
+
+  if (const auto err = dev_open_ro(filename.c_str()); err) {
+    PLOG(INFO) << "Failed to open " << filename;
+    return nullptr;
+  }
+  DEFER { dev_close(); };
+
+  if (const auto err = erofs_read_superblock(); err) {
+    PLOG(INFO) << "Failed to parse " << filename << " as EROFS image";
+    return nullptr;
+  }
+  struct stat st;
+  if (const auto err = fstat(erofs_devfd, &st); err) {
+    PLOG(ERROR) << "Failed to stat() " << filename;
+    return nullptr;
+  }
+  const time_t time = sbi.build_time;
+  LOG(INFO) << "Parsed EROFS image of size " << st.st_size << " built in "
+            << ctime(&time) << " " << filename;
+  std::vector<File> files;
+  if (!ErofsFilesystem::GetFiles(filename, &files)) {
+    return nullptr;
+  }
+  // private ctor doesn't work with make_unique
+  return std::unique_ptr<ErofsFilesystem>(
+      new ErofsFilesystem(filename, st.st_size, std::move(files)));
+}
+
+bool ErofsFilesystem::GetFiles(std::vector<File>* files) const {
+  *files = files_;
+  return true;
+}
+
+bool ErofsFilesystem::GetFiles(const std::string& filename,
+                               std::vector<File>* files) {
+  erofs_iterate_root_dir(&sbi, [&](struct erofs_iterate_dir_context* p_info) {
+    const auto& info = *p_info;
+    if (info.ctx.de_ftype != EROFS_FT_REG_FILE) {
+      return 0;
+    }
+    struct erofs_inode inode;
+    inode.nid = info.ctx.de_nid;
+    int err = erofs_read_inode_from_disk(&inode);
+    if (err) {
+      LOG(ERROR) << "Failed to read inode " << inode.nid;
+      return err;
+    }
+    const auto uncompressed_size = inode.i_size;
+    erofs_off_t compressed_size = 0;
+    if (uncompressed_size == 0) {
+      return 0;
+    }
+    err = GetOccupiedSize(&inode, &compressed_size);
+    if (err) {
+      LOG(FATAL) << "Failed to get occupied size for " << filename;
+      return err;
+    }
+    // If data is packed inline, likely this node is stored on block unalighed
+    // addresses. OTA doesn't work for non-block aligned files. All blocks not
+    // reported by |GetFiles| will be updated in 1 operation. Ignore inline
+    // files for now.
+    // TODO(b/206729162) Support un-aligned files.
+    if (inode.datalayout == EROFS_INODE_FLAT_INLINE) {
+      return 0;
+    }
+
+    File file;
+    file.name = info.path;
+    file.compressed_file_info.zero_padding_enabled =
+        erofs_sb_has_lz4_0padding();
+    file.is_compressed = compressed_size != uncompressed_size;
+
+    file.file_stat.st_size = uncompressed_size;
+    file.file_stat.st_ino = inode.nid;
+    FillCompressedBlockInfo(&file, filename, &inode);
+
+    files->emplace_back(std::move(file));
+    return 0;
+  });
+
+  for (auto& file : *files) {
+    NormalizeExtents(&file.extents);
+  }
+  return true;
+}
+
+}  // namespace chromeos_update_engine
\ No newline at end of file
diff --git a/payload_generator/erofs_filesystem.h b/payload_generator/erofs_filesystem.h
new file mode 100644
index 0000000..473c609
--- /dev/null
+++ b/payload_generator/erofs_filesystem.h
@@ -0,0 +1,69 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_EROFS_FILESYSTEM_H_
+#define UPDATE_ENGINE_PAYLOAD_GENERATOR_EROFS_FILESYSTEM_H_
+
+#include "update_engine/payload_generator/filesystem_interface.h"
+#include "update_engine/payload_generator/delta_diff_generator.h"
+
+namespace chromeos_update_engine {
+
+class ErofsFilesystem final : public FilesystemInterface {
+ public:
+  // Creates an Ext2Filesystem from a ext2 formatted filesystem stored in a
+  // file. The file doesn't need to be loop-back mounted.
+  static std::unique_ptr<ErofsFilesystem> CreateFromFile(
+      const std::string& filename);
+  virtual ~ErofsFilesystem() = default;
+
+  // FilesystemInterface overrides.
+  size_t GetBlockSize() const override { return kBlockSize; }
+  size_t GetBlockCount() const override { return fs_size_ / kBlockSize; }
+
+  // GetFiles will return one FilesystemInterface::File for every file and every
+  // directory in the filesystem. Hard-linked files will appear in the list
+  // several times with the same list of blocks.
+  // On addition to actual files, it also returns these pseudo-files:
+  //  <free-space>: With all the unallocated data-blocks.
+  //  <inode-blocks>: Will all the data-blocks for second and third level inodes
+  //    of all the files.
+  //  <group-descriptors>: With the block group descriptor and their reserved
+  //    space.
+  //  <metadata>: With the rest of ext2 metadata blocks, such as superblocks
+  //    and bitmap tables.
+  static bool GetFiles(const std::string& filename, std::vector<File>* files);
+
+  bool GetFiles(std::vector<File>* files) const override;
+
+  bool LoadSettings(
+      [[maybe_unused]] brillo::KeyValueStore* store) const override {
+    return true;
+  }
+
+ private:
+  ErofsFilesystem(std::string filename, size_t fs_size, std::vector<File> files)
+      : filename_(filename), fs_size_(fs_size), files_(std::move(files)) {}
+
+  // The file where the filesystem is stored.
+  const std::string filename_;
+  const size_t fs_size_;
+  const std::vector<File> files_;
+};  // namespace chromeos_update_engine
+
+}  // namespace chromeos_update_engine
+
+#endif
\ No newline at end of file
diff --git a/payload_generator/erofs_iterate.h b/payload_generator/erofs_iterate.h
new file mode 100644
index 0000000..ac9d67e
--- /dev/null
+++ b/payload_generator/erofs_iterate.h
@@ -0,0 +1,76 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <string>
+
+#include <erofs/dir.h>
+
+#include "update_engine/common/utils.h"
+
+//  The only way to pass extra information to callback function is to use a
+//  wrapper type for erofs_dir_context. So here we go
+struct erofs_iterate_dir_context {
+  struct erofs_dir_context ctx;
+  std::string path;
+  void* arg;
+};
+
+// Dear compiler, please don't reoder fields inside erofs_iterate_dir_context.
+// Because EROFS expects us to pass a wrapper type. So |ctx| member of
+// erofs_iterate_dir_context must be put at 0 offset.
+static_assert(offsetof(erofs_iterate_dir_context, ctx) == 0);
+
+// Callable shold be a functor like
+// std::function<int(struct erofs_inode_info *)>
+template <typename Callable>
+int erofs_iterate_root_dir(const struct erofs_sb_info* sbi, Callable cb) {
+  struct erofs_inode dir {
+    .nid = sbi->root_nid
+  };
+  int err = erofs_read_inode_from_disk(&dir);
+  if (err) {
+    LOG(ERROR) << "Failed to read inode " << sbi->root_nid << " from disk";
+    return err;
+  }
+  struct erofs_iterate_dir_context param {
+    .ctx.dir = &dir, .ctx.pnid = sbi->root_nid,
+    .ctx.cb = [](struct erofs_dir_context* arg) -> int {
+      auto ctx = reinterpret_cast<erofs_iterate_dir_context*>(arg);
+      auto& path = ctx->path;
+      const auto len = path.size();
+      path.push_back('/');
+      path.insert(
+          path.end(), ctx->ctx.dname, ctx->ctx.dname + ctx->ctx.de_namelen);
+      auto cb = static_cast<Callable*>(ctx->arg);
+      const auto err = (*cb)(ctx);
+      if (!err && !ctx->ctx.dot_dotdot && ctx->ctx.de_ftype == EROFS_FT_DIR) {
+        // recursively walk into subdirectories
+        erofs_inode dir{.nid = ctx->ctx.de_nid};
+        if (const int err = erofs_read_inode_from_disk(&dir); err) {
+          return err;
+        }
+        ctx->ctx.dir = &dir;
+        if (const auto err = erofs_iterate_dir(&ctx->ctx, false); err) {
+          return err;
+        }
+      }
+      path.resize(len);
+      return err;
+    },
+    .arg = &cb,
+  };
+  return erofs_iterate_dir(&param.ctx, false);
+}
diff --git a/payload_generator/filesystem_interface.h b/payload_generator/filesystem_interface.h
index 05d387f..2d2846d 100644
--- a/payload_generator/filesystem_interface.h
+++ b/payload_generator/filesystem_interface.h
@@ -31,10 +31,10 @@
 #include <string>
 #include <vector>
 
-#include <base/macros.h>
 #include <brillo/key_value_store.h>
 #include <puffin/utils.h>
 
+#include "update_engine/lz4diff/lz4diff_format.h"
 #include "update_engine/update_metadata.pb.h"
 
 namespace chromeos_update_engine {
@@ -72,6 +72,8 @@
     // All the deflate locations in the file. These locations are not relative
     // to the extents. They are relative to the file system itself.
     std::vector<puffin::BitExtent> deflates;
+
+    CompressedFile compressed_file_info;
   };
 
   virtual ~FilesystemInterface() = default;
diff --git a/payload_generator/payload_generation_config.cc b/payload_generator/payload_generation_config.cc
index cbdecef..bb8f64f 100644
--- a/payload_generator/payload_generation_config.cc
+++ b/payload_generator/payload_generation_config.cc
@@ -32,6 +32,7 @@
 #include "update_engine/payload_generator/boot_img_filesystem.h"
 #include "update_engine/payload_generator/delta_diff_generator.h"
 #include "update_engine/payload_generator/delta_diff_utils.h"
+#include "update_engine/payload_generator/erofs_filesystem.h"
 #include "update_engine/payload_generator/ext2_filesystem.h"
 #include "update_engine/payload_generator/mapfile_filesystem.h"
 #include "update_engine/payload_generator/raw_filesystem.h"
@@ -75,6 +76,11 @@
       return true;
     }
   }
+  fs_interface = ErofsFilesystem::CreateFromFile(path);
+  if (fs_interface) {
+    TEST_AND_RETURN_FALSE(fs_interface->GetBlockSize() == kBlockSize);
+    return true;
+  }
 
   if (!mapfile_path.empty()) {
     fs_interface = MapfileFilesystem::CreateFromFile(path, mapfile_path);