Add std::map implementation for cd entry map

Add the std::map implementation to be used later in zip64 format.
Also move the entry map classes to a separate file to make the hierarchy
clear.

Test: unittests pass
Change-Id: I74d95f53207cc8ca871b955e2a15c184d5497833
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 4451507..34a9c54 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -107,15 +107,15 @@
 }
 
 // Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
-std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
-                                                               const uint8_t* start) const {
+std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
+                                                                const uint8_t* start) const {
   const uint32_t hash = ComputeHash(name);
 
   // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
   uint32_t ent = hash & (hash_table_size_ - 1);
   while (hash_table_[ent].name_offset != 0) {
     if (hash_table_[ent].ToStringView(start) == name) {
-      return {0, hash_table_[ent].name_offset};
+      return {kSuccess, hash_table_[ent].name_offset};
     }
     ent = (ent + 1) & (hash_table_size_ - 1);
   }
@@ -124,7 +124,7 @@
   return {kEntryNotFound, 0};
 }
 
-int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
+ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
   const uint64_t hash = ComputeHash(name);
   uint32_t ent = hash & (hash_table_size_ - 1);
 
@@ -145,7 +145,7 @@
   const char* start_char = reinterpret_cast<const char*>(start);
   hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
   hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
-  return 0;
+  return kSuccess;
 }
 
 void CdEntryMapZip32::ResetIteration() {
@@ -166,6 +166,11 @@
 }
 
 CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
+  /*
+   * Create hash table.  We have a minimum 75% load factor, possibly as
+   * low as 50% after we round off to a power of 2.  There must be at
+   * least one unused entry to avoid an infinite loop during creation.
+   */
   hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
   hash_table_ = {
       reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
@@ -179,6 +184,43 @@
   return std::unique_ptr<CdEntryMapInterface>(entry_map);
 }
 
+std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
+  return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
+}
+
+ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
+  const auto [it, added] =
+      entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
+  if (!added) {
+    ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
+    return kDuplicateEntry;
+  }
+  return kSuccess;
+}
+
+std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
+                                                                const uint8_t* /*cd_start*/) const {
+  const auto it = entry_table_.find(name);
+  if (it == entry_table_.end()) {
+    ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
+    return {kEntryNotFound, 0};
+  }
+
+  return {kSuccess, it->second};
+}
+
+void CdEntryMapZip64::ResetIteration() {
+  iterator_ = entry_table_.begin();
+}
+
+std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
+  if (iterator_ == entry_table_.end()) {
+    return {};
+  }
+
+  return *iterator_++;
+}
+
 #if defined(__BIONIC__)
 uint64_t GetOwnerTag(const ZipArchive* archive) {
   return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
@@ -357,12 +399,12 @@
   const size_t cd_length = archive->central_directory.GetMapLength();
   const uint16_t num_entries = archive->num_entries;
 
-  /*
-   * Create hash table.  We have a minimum 75% load factor, possibly as
-   * low as 50% after we round off to a power of 2.  There must be at
-   * least one unused entry to avoid an infinite loop during creation.
-   */
-  archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+  // TODO(xunchang) parse the zip64 Eocd
+  if (num_entries > UINT16_MAX) {
+    archive->cd_entry_map = CdEntryMapZip64::Create();
+  } else {
+    archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+  }
   if (archive->cd_entry_map == nullptr) {
     return kAllocationFailed;
   }
diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h
index 68977f6..ecb9f22 100644
--- a/libziparchive/zip_archive_private.h
+++ b/libziparchive/zip_archive_private.h
@@ -22,6 +22,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <map>
 #include <memory>
 #include <utility>
 #include <vector>
@@ -46,7 +47,9 @@
     "Allocation failed",
 };
 
-enum ErrorCodes : int32_t {
+enum ZipError : int32_t {
+  kSuccess = 0,
+
   kIterationEnd = -1,
 
   // We encountered a Zlib error when inflating a stream from this file.
@@ -149,12 +152,12 @@
   // Adds an entry to the map. The |name| should internally points to the
   // filename field of a cd entry. And |start| points to the beginning of the
   // central directory. Returns 0 on success.
-  virtual int32_t AddToMap(std::string_view name, const uint8_t* start) = 0;
+  virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
   // For the zip entry |entryName|, finds the offset of its filename field in
   // the central directory. Returns a pair of [status, offset]. The value of
   // the status is 0 on success.
-  virtual std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
-                                                        const uint8_t* cd_start) const = 0;
+  virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+                                                         const uint8_t* cd_start) const = 0;
   // Resets the iterator to the beginning of the map.
   virtual void ResetIteration() = 0;
   // Returns the [name, cd offset] of the current element. Also increments the
@@ -190,9 +193,9 @@
  public:
   static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
 
-  int32_t AddToMap(std::string_view name, const uint8_t* start) override;
-  std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
-                                                const uint8_t* cd_start) const override;
+  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
+  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+                                                 const uint8_t* cd_start) const override;
   void ResetIteration() override;
   std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
 
@@ -210,6 +213,25 @@
   uint32_t current_position_{0};
 };
 
+// This implementation of CdEntryMap uses a std::map
+class CdEntryMapZip64 : public CdEntryMapInterface {
+ public:
+  static std::unique_ptr<CdEntryMapInterface> Create();
+
+  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
+  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+                                                 const uint8_t* cd_start) const override;
+  void ResetIteration() override;
+  std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
+
+ private:
+  CdEntryMapZip64() = default;
+
+  std::map<std::string_view, uint64_t> entry_table_;
+
+  std::map<std::string_view, uint64_t>::iterator iterator_;
+};
+
 struct ZipArchive {
   // open Zip archive
   mutable MappedZipFile mapped_zip;
diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc
index 0916304..523d4c5 100644
--- a/libziparchive/zip_archive_test.cc
+++ b/libziparchive/zip_archive_test.cc
@@ -24,11 +24,14 @@
 #include <unistd.h>
 
 #include <memory>
+#include <set>
+#include <string_view>
 #include <vector>
 
 #include <android-base/file.h>
 #include <android-base/logging.h>
 #include <android-base/mapped_file.h>
+#include <android-base/strings.h>
 #include <android-base/unique_fd.h>
 #include <gtest/gtest.h>
 #include <ziparchive/zip_archive.h>
@@ -53,6 +56,76 @@
   return OpenArchive(abs_path.c_str(), handle);
 }
 
+class CdEntryMapTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    names_ = {
+        "a.txt", "b.txt", "b/", "b/c.txt", "b/d.txt",
+    };
+    separator_ = "separator";
+    header_ = "metadata";
+    joined_names_ = header_ + android::base::Join(names_, separator_);
+    base_ptr_ = reinterpret_cast<uint8_t*>(&joined_names_[0]);
+
+    entry_maps_.emplace_back(CdEntryMapZip32::Create(static_cast<uint16_t>(names_.size())));
+    entry_maps_.emplace_back(CdEntryMapZip64::Create());
+    for (auto& cd_map : entry_maps_) {
+      ASSERT_NE(nullptr, cd_map);
+      size_t offset = header_.size();
+      for (const auto& name : names_) {
+        auto status = cd_map->AddToMap(
+            std::string_view{joined_names_.c_str() + offset, name.size()}, base_ptr_);
+        ASSERT_EQ(0, status);
+        offset += name.size() + separator_.size();
+      }
+    }
+  }
+
+  std::vector<std::string> names_;
+  // A continuous region of memory serves as a mock of the central directory.
+  std::string joined_names_;
+  // We expect some metadata at the beginning of the central directory and between filenames.
+  std::string header_;
+  std::string separator_;
+
+  std::vector<std::unique_ptr<CdEntryMapInterface>> entry_maps_;
+  uint8_t* base_ptr_{nullptr};  // Points to the start of the central directory.
+};
+
+TEST_F(CdEntryMapTest, AddDuplicatedEntry) {
+  for (auto& cd_map : entry_maps_) {
+    std::string_view name = "b.txt";
+    ASSERT_NE(0, cd_map->AddToMap(name, base_ptr_));
+  }
+}
+
+TEST_F(CdEntryMapTest, FindEntry) {
+  for (auto& cd_map : entry_maps_) {
+    uint64_t expected_offset = header_.size();
+    for (const auto& name : names_) {
+      auto [status, offset] = cd_map->GetCdEntryOffset(name, base_ptr_);
+      ASSERT_EQ(status, kSuccess);
+      ASSERT_EQ(offset, expected_offset);
+      expected_offset += name.size() + separator_.size();
+    }
+  }
+}
+
+TEST_F(CdEntryMapTest, Iteration) {
+  std::set<std::string_view> expected(names_.begin(), names_.end());
+  for (auto& cd_map : entry_maps_) {
+    cd_map->ResetIteration();
+    std::set<std::string_view> entry_set;
+    auto ret = cd_map->Next(base_ptr_);
+    while (ret != std::pair<std::string_view, uint64_t>{}) {
+      auto [it, insert_status] = entry_set.insert(ret.first);
+      ASSERT_TRUE(insert_status);
+      ret = cd_map->Next(base_ptr_);
+    }
+    ASSERT_EQ(expected, entry_set);
+  }
+}
+
 TEST(ziparchive, Open) {
   ZipArchiveHandle handle;
   ASSERT_EQ(0, OpenArchiveWrapper(kValidZip, &handle));