Create an interface for the cd entry hash table
The current implementation of the hashtable uses less memory than
a std::map. As most of the zip files we encountered don't use the zip64
extension, we should keep the current implementation. And the interface
adds the flexibility for us to switch to std::map for zip64 format.
Bug: 150900468
Test: unit tests pass
Change-Id: Ifd008785c9ff416a27049f9e0c54d9eef985bd85
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 68837cc..4451507 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -106,55 +106,79 @@
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}
-/*
- * Convert a ZipEntry to a hash table index, verifying that it's in a
- * valid range.
- */
-static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t hash_table_size,
- std::string_view name, const uint8_t* start) {
+// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
+std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
+ const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
- uint32_t ent = hash & (hash_table_size - 1);
- while (hash_table[ent].name_offset != 0) {
- if (hash_table[ent].ToStringView(start) == name) {
- return ent;
+ uint32_t ent = hash & (hash_table_size_ - 1);
+ while (hash_table_[ent].name_offset != 0) {
+ if (hash_table_[ent].ToStringView(start) == name) {
+ return {0, hash_table_[ent].name_offset};
}
- ent = (ent + 1) & (hash_table_size - 1);
+ ent = (ent + 1) & (hash_table_size_ - 1);
}
ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
- return kEntryNotFound;
+ return {kEntryNotFound, 0};
}
-/*
- * Add a new entry to the hash table.
- */
-static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_size,
- std::string_view name, const uint8_t* start) {
+int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
- uint32_t ent = hash & (hash_table_size - 1);
+ uint32_t ent = hash & (hash_table_size_ - 1);
/*
* We over-allocated the table, so we're guaranteed to find an empty slot.
* Further, we guarantee that the hashtable size is not 0.
*/
- while (hash_table[ent].name_offset != 0) {
- if (hash_table[ent].ToStringView(start) == name) {
+ while (hash_table_[ent].name_offset != 0) {
+ if (hash_table_[ent].ToStringView(start) == name) {
// We've found a duplicate entry. We don't accept duplicates.
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
- ent = (ent + 1) & (hash_table_size - 1);
+ ent = (ent + 1) & (hash_table_size_ - 1);
}
// `name` has already been validated before entry.
const char* start_char = reinterpret_cast<const char*>(start);
- hash_table[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
- hash_table[ent].name_length = static_cast<uint16_t>(name.size());
+ hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
+ hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
return 0;
}
+void CdEntryMapZip32::ResetIteration() {
+ current_position_ = 0;
+}
+
+std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
+ while (current_position_ < hash_table_size_) {
+ const auto& entry = hash_table_[current_position_];
+ current_position_ += 1;
+
+ if (entry.name_offset != 0) {
+ return {entry.ToStringView(cd_start), entry.name_offset};
+ }
+ }
+ // We have reached the end of the hash table.
+ return {};
+}
+
+CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
+ hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
+ hash_table_ = {
+ reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
+}
+
+std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
+ auto entry_map = new CdEntryMapZip32(num_entries);
+ CHECK(entry_map->hash_table_ != nullptr)
+ << "Zip: unable to allocate the " << entry_map->hash_table_size_
+ << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
+ return std::unique_ptr<CdEntryMapInterface>(entry_map);
+}
+
#if defined(__BIONIC__)
uint64_t GetOwnerTag(const ZipArchive* archive) {
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
@@ -168,9 +192,7 @@
directory_offset(0),
central_directory(),
directory_map(),
- num_entries(0),
- hash_table_size(0),
- hash_table(nullptr) {
+ num_entries(0) {
#if defined(__BIONIC__)
if (assume_ownership) {
android_fdsan_exchange_owner_tag(fd, 0, GetOwnerTag(this));
@@ -184,9 +206,7 @@
directory_offset(0),
central_directory(),
directory_map(),
- num_entries(0),
- hash_table_size(0),
- hash_table(nullptr) {}
+ num_entries(0) {}
ZipArchive::~ZipArchive() {
if (close_file && mapped_zip.GetFileDescriptor() >= 0) {
@@ -196,8 +216,6 @@
close(mapped_zip.GetFileDescriptor());
#endif
}
-
- free(hash_table);
}
static int32_t MapCentralDirectory0(const char* debug_file_name, ZipArchive* archive,
@@ -344,12 +362,8 @@
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
- archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
- archive->hash_table =
- reinterpret_cast<ZipStringOffset*>(calloc(archive->hash_table_size, sizeof(ZipStringOffset)));
- if (archive->hash_table == nullptr) {
- ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu",
- archive->hash_table_size, sizeof(ZipStringOffset));
+ archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+ if (archive->cd_entry_map == nullptr) {
return kAllocationFailed;
}
@@ -401,9 +415,9 @@
// Add the CDE filename to the hash table.
std::string_view entry_name{reinterpret_cast<const char*>(file_name), file_name_length};
- const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name,
- archive->central_directory.GetBasePtr());
- if (add_result != 0) {
+ if (auto add_result =
+ archive->cd_entry_map->AddToMap(entry_name, archive->central_directory.GetBasePtr());
+ add_result != 0) {
ALOGW("Zip: Error adding entry to hash table %d", add_result);
return add_result;
}
@@ -514,14 +528,13 @@
return 0;
}
-static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* data) {
- const uint16_t nameLen = archive->hash_table[ent].name_length;
-
+static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
+ const uint64_t nameOffset, ZipEntry* data) {
// Recover the start of the central directory entry from the filename
// pointer. The filename is the first entry past the fixed-size data,
// so we can just subtract back from that.
const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
- const uint8_t* ptr = base_ptr + archive->hash_table[ent].name_offset;
+ const uint8_t* ptr = base_ptr + nameOffset;
ptr -= sizeof(CentralDirectoryRecord);
// This is the base of our mmapped region, we have to sanity check that
@@ -627,8 +640,11 @@
// Check that the local file header name matches the declared
// name in the central directory.
+ CHECK_LE(entryName.size(), UINT16_MAX);
+ auto nameLen = static_cast<uint16_t>(entryName.size());
if (lfh->file_name_length != nameLen) {
- ALOGW("Zip: lfh name length did not match central directory");
+ ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
+ std::string(entryName).c_str(), lfh->file_name_length, nameLen);
return kInconsistentInformation;
}
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
@@ -641,9 +657,7 @@
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
return kIoError;
}
- const std::string_view entry_name =
- archive->hash_table[ent].ToStringView(archive->central_directory.GetBasePtr());
- if (memcmp(entry_name.data(), name_buf.data(), nameLen) != 0) {
+ if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
ALOGW("Zip: lfh name did not match central directory");
return kInconsistentInformation;
}
@@ -689,7 +703,7 @@
int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
const std::string_view optional_prefix,
const std::string_view optional_suffix) {
- if (archive == NULL || archive->hash_table == NULL) {
+ if (archive == nullptr || archive->cd_entry_map == nullptr) {
ALOGW("Zip: Invalid ZipArchiveHandle");
return kInvalidHandle;
}
@@ -700,6 +714,7 @@
return kInvalidEntryName;
}
+ archive->cd_entry_map->ResetIteration();
*cookie_ptr = new IterationHandle(archive, optional_prefix, optional_suffix);
return 0;
}
@@ -715,14 +730,14 @@
return kInvalidEntryName;
}
- const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName,
- archive->central_directory.GetBasePtr());
- if (ent < 0) {
+ const auto [result, offset] =
+ archive->cd_entry_map->GetCdEntryOffset(entryName, archive->central_directory.GetBasePtr());
+ if (result != 0) {
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
- return static_cast<int32_t>(ent); // kEntryNotFound is safe to truncate.
+ return static_cast<int32_t>(result); // kEntryNotFound is safe to truncate.
}
// We know there are at most hash_table_size entries, safe to truncate.
- return FindEntry(archive, static_cast<uint32_t>(ent), data);
+ return FindEntry(archive, entryName, offset, data);
}
int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
@@ -736,35 +751,32 @@
int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
- if (handle == NULL) {
+ if (handle == nullptr) {
ALOGW("Zip: Null ZipArchiveHandle");
return kInvalidHandle;
}
ZipArchive* archive = handle->archive;
- if (archive == NULL || archive->hash_table == NULL) {
+ if (archive == nullptr || archive->cd_entry_map == nullptr) {
ALOGW("Zip: Invalid ZipArchiveHandle");
return kInvalidHandle;
}
- const uint32_t currentOffset = handle->position;
- const uint32_t hash_table_length = archive->hash_table_size;
- const ZipStringOffset* hash_table = archive->hash_table;
- for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
- const std::string_view entry_name =
- hash_table[i].ToStringView(archive->central_directory.GetBasePtr());
- if (hash_table[i].name_offset != 0 && (android::base::StartsWith(entry_name, handle->prefix) &&
- android::base::EndsWith(entry_name, handle->suffix))) {
- handle->position = (i + 1);
- const int error = FindEntry(archive, i, data);
+ auto entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
+ while (entry != std::pair<std::string_view, uint64_t>()) {
+ const auto [entry_name, offset] = entry;
+ if (android::base::StartsWith(entry_name, handle->prefix) &&
+ android::base::EndsWith(entry_name, handle->suffix)) {
+ const int error = FindEntry(archive, entry_name, offset, data);
if (!error && name) {
*name = entry_name;
}
return error;
}
+ entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
}
- handle->position = 0;
+ archive->cd_entry_map->ResetIteration();
return kIterationEnd;
}