Create an interface for the cd entry hash table

The current implementation of the hashtable uses less memory than
a std::map. As most of the zip files we encountered don't use the zip64
extension, we should keep the current implementation. And the interface
adds the flexibility for us to switch to std::map for zip64 format.

Bug: 150900468
Test: unit tests pass
Change-Id: Ifd008785c9ff416a27049f9e0c54d9eef985bd85
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 68837cc..4451507 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -106,55 +106,79 @@
   return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
 }
 
-/*
- * Convert a ZipEntry to a hash table index, verifying that it's in a
- * valid range.
- */
-static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t hash_table_size,
-                            std::string_view name, const uint8_t* start) {
+// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
+std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
+                                                               const uint8_t* start) const {
   const uint32_t hash = ComputeHash(name);
 
   // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
-  uint32_t ent = hash & (hash_table_size - 1);
-  while (hash_table[ent].name_offset != 0) {
-    if (hash_table[ent].ToStringView(start) == name) {
-      return ent;
+  uint32_t ent = hash & (hash_table_size_ - 1);
+  while (hash_table_[ent].name_offset != 0) {
+    if (hash_table_[ent].ToStringView(start) == name) {
+      return {0, hash_table_[ent].name_offset};
     }
-    ent = (ent + 1) & (hash_table_size - 1);
+    ent = (ent + 1) & (hash_table_size_ - 1);
   }
 
   ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
-  return kEntryNotFound;
+  return {kEntryNotFound, 0};
 }
 
-/*
- * Add a new entry to the hash table.
- */
-static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_size,
-                         std::string_view name, const uint8_t* start) {
+int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
   const uint64_t hash = ComputeHash(name);
-  uint32_t ent = hash & (hash_table_size - 1);
+  uint32_t ent = hash & (hash_table_size_ - 1);
 
   /*
    * We over-allocated the table, so we're guaranteed to find an empty slot.
    * Further, we guarantee that the hashtable size is not 0.
    */
-  while (hash_table[ent].name_offset != 0) {
-    if (hash_table[ent].ToStringView(start) == name) {
+  while (hash_table_[ent].name_offset != 0) {
+    if (hash_table_[ent].ToStringView(start) == name) {
       // We've found a duplicate entry. We don't accept duplicates.
       ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
       return kDuplicateEntry;
     }
-    ent = (ent + 1) & (hash_table_size - 1);
+    ent = (ent + 1) & (hash_table_size_ - 1);
   }
 
   // `name` has already been validated before entry.
   const char* start_char = reinterpret_cast<const char*>(start);
-  hash_table[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
-  hash_table[ent].name_length = static_cast<uint16_t>(name.size());
+  hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
+  hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
   return 0;
 }
 
+void CdEntryMapZip32::ResetIteration() {
+  current_position_ = 0;
+}
+
+std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
+  while (current_position_ < hash_table_size_) {
+    const auto& entry = hash_table_[current_position_];
+    current_position_ += 1;
+
+    if (entry.name_offset != 0) {
+      return {entry.ToStringView(cd_start), entry.name_offset};
+    }
+  }
+  // We have reached the end of the hash table.
+  return {};
+}
+
+CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
+  hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
+  hash_table_ = {
+      reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
+}
+
+std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
+  auto entry_map = new CdEntryMapZip32(num_entries);
+  CHECK(entry_map->hash_table_ != nullptr)
+      << "Zip: unable to allocate the " << entry_map->hash_table_size_
+      << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
+  return std::unique_ptr<CdEntryMapInterface>(entry_map);
+}
+
 #if defined(__BIONIC__)
 uint64_t GetOwnerTag(const ZipArchive* archive) {
   return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
@@ -168,9 +192,7 @@
       directory_offset(0),
       central_directory(),
       directory_map(),
-      num_entries(0),
-      hash_table_size(0),
-      hash_table(nullptr) {
+      num_entries(0) {
 #if defined(__BIONIC__)
   if (assume_ownership) {
     android_fdsan_exchange_owner_tag(fd, 0, GetOwnerTag(this));
@@ -184,9 +206,7 @@
       directory_offset(0),
       central_directory(),
       directory_map(),
-      num_entries(0),
-      hash_table_size(0),
-      hash_table(nullptr) {}
+      num_entries(0) {}
 
 ZipArchive::~ZipArchive() {
   if (close_file && mapped_zip.GetFileDescriptor() >= 0) {
@@ -196,8 +216,6 @@
     close(mapped_zip.GetFileDescriptor());
 #endif
   }
-
-  free(hash_table);
 }
 
 static int32_t MapCentralDirectory0(const char* debug_file_name, ZipArchive* archive,
@@ -344,12 +362,8 @@
    * low as 50% after we round off to a power of 2.  There must be at
    * least one unused entry to avoid an infinite loop during creation.
    */
-  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
-  archive->hash_table =
-      reinterpret_cast<ZipStringOffset*>(calloc(archive->hash_table_size, sizeof(ZipStringOffset)));
-  if (archive->hash_table == nullptr) {
-    ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu",
-          archive->hash_table_size, sizeof(ZipStringOffset));
+  archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+  if (archive->cd_entry_map == nullptr) {
     return kAllocationFailed;
   }
 
@@ -401,9 +415,9 @@
 
     // Add the CDE filename to the hash table.
     std::string_view entry_name{reinterpret_cast<const char*>(file_name), file_name_length};
-    const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name,
-                                     archive->central_directory.GetBasePtr());
-    if (add_result != 0) {
+    if (auto add_result =
+            archive->cd_entry_map->AddToMap(entry_name, archive->central_directory.GetBasePtr());
+        add_result != 0) {
       ALOGW("Zip: Error adding entry to hash table %d", add_result);
       return add_result;
     }
@@ -514,14 +528,13 @@
   return 0;
 }
 
-static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* data) {
-  const uint16_t nameLen = archive->hash_table[ent].name_length;
-
+static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
+                         const uint64_t nameOffset, ZipEntry* data) {
   // Recover the start of the central directory entry from the filename
   // pointer.  The filename is the first entry past the fixed-size data,
   // so we can just subtract back from that.
   const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
-  const uint8_t* ptr = base_ptr + archive->hash_table[ent].name_offset;
+  const uint8_t* ptr = base_ptr + nameOffset;
   ptr -= sizeof(CentralDirectoryRecord);
 
   // This is the base of our mmapped region, we have to sanity check that
@@ -627,8 +640,11 @@
 
   // Check that the local file header name matches the declared
   // name in the central directory.
+  CHECK_LE(entryName.size(), UINT16_MAX);
+  auto nameLen = static_cast<uint16_t>(entryName.size());
   if (lfh->file_name_length != nameLen) {
-    ALOGW("Zip: lfh name length did not match central directory");
+    ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
+          std::string(entryName).c_str(), lfh->file_name_length, nameLen);
     return kInconsistentInformation;
   }
   const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
@@ -641,9 +657,7 @@
     ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
     return kIoError;
   }
-  const std::string_view entry_name =
-      archive->hash_table[ent].ToStringView(archive->central_directory.GetBasePtr());
-  if (memcmp(entry_name.data(), name_buf.data(), nameLen) != 0) {
+  if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
     ALOGW("Zip: lfh name did not match central directory");
     return kInconsistentInformation;
   }
@@ -689,7 +703,7 @@
 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
                        const std::string_view optional_prefix,
                        const std::string_view optional_suffix) {
-  if (archive == NULL || archive->hash_table == NULL) {
+  if (archive == nullptr || archive->cd_entry_map == nullptr) {
     ALOGW("Zip: Invalid ZipArchiveHandle");
     return kInvalidHandle;
   }
@@ -700,6 +714,7 @@
     return kInvalidEntryName;
   }
 
+  archive->cd_entry_map->ResetIteration();
   *cookie_ptr = new IterationHandle(archive, optional_prefix, optional_suffix);
   return 0;
 }
@@ -715,14 +730,14 @@
     return kInvalidEntryName;
   }
 
-  const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName,
-                                   archive->central_directory.GetBasePtr());
-  if (ent < 0) {
+  const auto [result, offset] =
+      archive->cd_entry_map->GetCdEntryOffset(entryName, archive->central_directory.GetBasePtr());
+  if (result != 0) {
     ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
-    return static_cast<int32_t>(ent);  // kEntryNotFound is safe to truncate.
+    return static_cast<int32_t>(result);  // kEntryNotFound is safe to truncate.
   }
   // We know there are at most hash_table_size entries, safe to truncate.
-  return FindEntry(archive, static_cast<uint32_t>(ent), data);
+  return FindEntry(archive, entryName, offset, data);
 }
 
 int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
@@ -736,35 +751,32 @@
 
 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
   IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
-  if (handle == NULL) {
+  if (handle == nullptr) {
     ALOGW("Zip: Null ZipArchiveHandle");
     return kInvalidHandle;
   }
 
   ZipArchive* archive = handle->archive;
-  if (archive == NULL || archive->hash_table == NULL) {
+  if (archive == nullptr || archive->cd_entry_map == nullptr) {
     ALOGW("Zip: Invalid ZipArchiveHandle");
     return kInvalidHandle;
   }
 
-  const uint32_t currentOffset = handle->position;
-  const uint32_t hash_table_length = archive->hash_table_size;
-  const ZipStringOffset* hash_table = archive->hash_table;
-  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
-    const std::string_view entry_name =
-        hash_table[i].ToStringView(archive->central_directory.GetBasePtr());
-    if (hash_table[i].name_offset != 0 && (android::base::StartsWith(entry_name, handle->prefix) &&
-                                           android::base::EndsWith(entry_name, handle->suffix))) {
-      handle->position = (i + 1);
-      const int error = FindEntry(archive, i, data);
+  auto entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
+  while (entry != std::pair<std::string_view, uint64_t>()) {
+    const auto [entry_name, offset] = entry;
+    if (android::base::StartsWith(entry_name, handle->prefix) &&
+        android::base::EndsWith(entry_name, handle->suffix)) {
+      const int error = FindEntry(archive, entry_name, offset, data);
       if (!error && name) {
         *name = entry_name;
       }
       return error;
     }
+    entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
   }
 
-  handle->position = 0;
+  archive->cd_entry_map->ResetIteration();
   return kIterationEnd;
 }