Add std::map implementation for cd entry map
Add the std::map implementation to be used later in zip64 format.
Also move the entry map classes to a separate file to make the hierarchy
clear.
Test: unittests pass
Change-Id: I74d95f53207cc8ca871b955e2a15c184d5497833
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 4451507..34a9c54 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -107,15 +107,15 @@
}
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
-std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
- const uint8_t* start) const {
+std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
+ const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
uint32_t ent = hash & (hash_table_size_ - 1);
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
- return {0, hash_table_[ent].name_offset};
+ return {kSuccess, hash_table_[ent].name_offset};
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
@@ -124,7 +124,7 @@
return {kEntryNotFound, 0};
}
-int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
+ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
uint32_t ent = hash & (hash_table_size_ - 1);
@@ -145,7 +145,7 @@
const char* start_char = reinterpret_cast<const char*>(start);
hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
- return 0;
+ return kSuccess;
}
void CdEntryMapZip32::ResetIteration() {
@@ -166,6 +166,11 @@
}
CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
+ /*
+ * Create hash table. We have a minimum 75% load factor, possibly as
+ * low as 50% after we round off to a power of 2. There must be at
+ * least one unused entry to avoid an infinite loop during creation.
+ */
hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
hash_table_ = {
reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
@@ -179,6 +184,43 @@
return std::unique_ptr<CdEntryMapInterface>(entry_map);
}
+std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
+ return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
+}
+
+ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
+ const auto [it, added] =
+ entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
+ if (!added) {
+ ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
+ return kDuplicateEntry;
+ }
+ return kSuccess;
+}
+
+std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
+ const uint8_t* /*cd_start*/) const {
+ const auto it = entry_table_.find(name);
+ if (it == entry_table_.end()) {
+ ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
+ return {kEntryNotFound, 0};
+ }
+
+ return {kSuccess, it->second};
+}
+
+void CdEntryMapZip64::ResetIteration() {
+ iterator_ = entry_table_.begin();
+}
+
+std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
+ if (iterator_ == entry_table_.end()) {
+ return {};
+ }
+
+ return *iterator_++;
+}
+
#if defined(__BIONIC__)
uint64_t GetOwnerTag(const ZipArchive* archive) {
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
@@ -357,12 +399,12 @@
const size_t cd_length = archive->central_directory.GetMapLength();
const uint16_t num_entries = archive->num_entries;
- /*
- * Create hash table. We have a minimum 75% load factor, possibly as
- * low as 50% after we round off to a power of 2. There must be at
- * least one unused entry to avoid an infinite loop during creation.
- */
- archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+ // TODO(xunchang) parse the zip64 Eocd
+ if (num_entries > UINT16_MAX) {
+ archive->cd_entry_map = CdEntryMapZip64::Create();
+ } else {
+ archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+ }
if (archive->cd_entry_map == nullptr) {
return kAllocationFailed;
}
diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h
index 68977f6..ecb9f22 100644
--- a/libziparchive/zip_archive_private.h
+++ b/libziparchive/zip_archive_private.h
@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <unistd.h>
+#include <map>
#include <memory>
#include <utility>
#include <vector>
@@ -46,7 +47,9 @@
"Allocation failed",
};
-enum ErrorCodes : int32_t {
+enum ZipError : int32_t {
+ kSuccess = 0,
+
kIterationEnd = -1,
// We encountered a Zlib error when inflating a stream from this file.
@@ -149,12 +152,12 @@
// Adds an entry to the map. The |name| should internally points to the
// filename field of a cd entry. And |start| points to the beginning of the
// central directory. Returns 0 on success.
- virtual int32_t AddToMap(std::string_view name, const uint8_t* start) = 0;
+ virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
// For the zip entry |entryName|, finds the offset of its filename field in
// the central directory. Returns a pair of [status, offset]. The value of
// the status is 0 on success.
- virtual std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
- const uint8_t* cd_start) const = 0;
+ virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+ const uint8_t* cd_start) const = 0;
// Resets the iterator to the beginning of the map.
virtual void ResetIteration() = 0;
// Returns the [name, cd offset] of the current element. Also increments the
@@ -190,9 +193,9 @@
public:
static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
- int32_t AddToMap(std::string_view name, const uint8_t* start) override;
- std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
- const uint8_t* cd_start) const override;
+ ZipError AddToMap(std::string_view name, const uint8_t* start) override;
+ std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+ const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
@@ -210,6 +213,25 @@
uint32_t current_position_{0};
};
+// This implementation of CdEntryMap uses a std::map
+class CdEntryMapZip64 : public CdEntryMapInterface {
+ public:
+ static std::unique_ptr<CdEntryMapInterface> Create();
+
+ ZipError AddToMap(std::string_view name, const uint8_t* start) override;
+ std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
+ const uint8_t* cd_start) const override;
+ void ResetIteration() override;
+ std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
+
+ private:
+ CdEntryMapZip64() = default;
+
+ std::map<std::string_view, uint64_t> entry_table_;
+
+ std::map<std::string_view, uint64_t>::iterator iterator_;
+};
+
struct ZipArchive {
// open Zip archive
mutable MappedZipFile mapped_zip;
diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc
index 0916304..523d4c5 100644
--- a/libziparchive/zip_archive_test.cc
+++ b/libziparchive/zip_archive_test.cc
@@ -24,11 +24,14 @@
#include <unistd.h>
#include <memory>
+#include <set>
+#include <string_view>
#include <vector>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/mapped_file.h>
+#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
#include <ziparchive/zip_archive.h>
@@ -53,6 +56,76 @@
return OpenArchive(abs_path.c_str(), handle);
}
+class CdEntryMapTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ names_ = {
+ "a.txt", "b.txt", "b/", "b/c.txt", "b/d.txt",
+ };
+ separator_ = "separator";
+ header_ = "metadata";
+ joined_names_ = header_ + android::base::Join(names_, separator_);
+ base_ptr_ = reinterpret_cast<uint8_t*>(&joined_names_[0]);
+
+ entry_maps_.emplace_back(CdEntryMapZip32::Create(static_cast<uint16_t>(names_.size())));
+ entry_maps_.emplace_back(CdEntryMapZip64::Create());
+ for (auto& cd_map : entry_maps_) {
+ ASSERT_NE(nullptr, cd_map);
+ size_t offset = header_.size();
+ for (const auto& name : names_) {
+ auto status = cd_map->AddToMap(
+ std::string_view{joined_names_.c_str() + offset, name.size()}, base_ptr_);
+ ASSERT_EQ(0, status);
+ offset += name.size() + separator_.size();
+ }
+ }
+ }
+
+ std::vector<std::string> names_;
+ // A continuous region of memory serves as a mock of the central directory.
+ std::string joined_names_;
+ // We expect some metadata at the beginning of the central directory and between filenames.
+ std::string header_;
+ std::string separator_;
+
+ std::vector<std::unique_ptr<CdEntryMapInterface>> entry_maps_;
+ uint8_t* base_ptr_{nullptr}; // Points to the start of the central directory.
+};
+
+TEST_F(CdEntryMapTest, AddDuplicatedEntry) {
+ for (auto& cd_map : entry_maps_) {
+ std::string_view name = "b.txt";
+ ASSERT_NE(0, cd_map->AddToMap(name, base_ptr_));
+ }
+}
+
+TEST_F(CdEntryMapTest, FindEntry) {
+ for (auto& cd_map : entry_maps_) {
+ uint64_t expected_offset = header_.size();
+ for (const auto& name : names_) {
+ auto [status, offset] = cd_map->GetCdEntryOffset(name, base_ptr_);
+ ASSERT_EQ(status, kSuccess);
+ ASSERT_EQ(offset, expected_offset);
+ expected_offset += name.size() + separator_.size();
+ }
+ }
+}
+
+TEST_F(CdEntryMapTest, Iteration) {
+ std::set<std::string_view> expected(names_.begin(), names_.end());
+ for (auto& cd_map : entry_maps_) {
+ cd_map->ResetIteration();
+ std::set<std::string_view> entry_set;
+ auto ret = cd_map->Next(base_ptr_);
+ while (ret != std::pair<std::string_view, uint64_t>{}) {
+ auto [it, insert_status] = entry_set.insert(ret.first);
+ ASSERT_TRUE(insert_status);
+ ret = cd_map->Next(base_ptr_);
+ }
+ ASSERT_EQ(expected, entry_set);
+ }
+}
+
TEST(ziparchive, Open) {
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveWrapper(kValidZip, &handle));