Merge "Implement the functions to parse zip64 structs"
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 44c47f3..9b6213a 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -67,5 +67,11 @@
{
"name": "ziparchive-tests"
}
+ ],
+
+ "postsubmit": [
+ {
+ "name": "ziparchive_tests_large"
+ }
]
}
diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp
index 4081b21..786e7b3 100644
--- a/libziparchive/Android.bp
+++ b/libziparchive/Android.bp
@@ -212,3 +212,20 @@
data: ["cli-tests/**/*"],
target_required: ["cli-test", "ziptool"],
}
+
+python_test_host {
+ name: "ziparchive_tests_large",
+ srcs: ["test_ziparchive_large.py"],
+ main: "test_ziparchive_large.py",
+ version: {
+ py2: {
+ enabled: true,
+ embedded_launcher: false,
+ },
+ py3: {
+ enabled: false,
+ embedded_launcher: false,
+ },
+ },
+ test_suites: ["general-tests"],
+}
diff --git a/libziparchive/test_ziparchive_large.py b/libziparchive/test_ziparchive_large.py
new file mode 100644
index 0000000..c29c37e
--- /dev/null
+++ b/libziparchive/test_ziparchive_large.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unittests for parsing files in zip64 format"""
+
+import os
+import subprocess
+import tempfile
+import unittest
+import zipfile
+import time
+
+class Zip64Test(unittest.TestCase):
+ @staticmethod
+ def _AddEntriesToZip(output_zip, entries_dict=None):
+ for name, size in entries_dict.items():
+ contents = name[0] * 1024
+ file_path = tempfile.NamedTemporaryFile()
+ with open(file_path.name, 'w') as f:
+ for it in range(0, size):
+ f.write(contents)
+ output_zip.write(file_path.name, arcname = name)
+
+ def _getEntryNames(self, zip_name):
+ cmd = ['ziptool', 'zipinfo', '-1', zip_name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ output, _ = proc.communicate()
+ self.assertEquals(0, proc.returncode)
+ self.assertNotEqual(None, output)
+ return output.split()
+
+ def _ExtractEntries(self, zip_name):
+ temp_dir = tempfile.mkdtemp()
+ cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc.communicate()
+ self.assertEquals(0, proc.returncode)
+
+ def test_entriesSmallerThan2G(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
+ # than 4GiB in size.
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
+ 'd.txt': 1025 * 1024, 'e.txt': 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+ def test_largeNumberOfEntries(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ entry_dict = {}
+ # Add 100k entries (more than 65535|UINT16_MAX).
+ for num in range(0, 100 * 1024):
+ entry_dict[str(num)] = 50
+
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+ def test_largeCompressedEntries(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
+ allowZip64=True) as output_zip:
+ # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
+ # sizes in the extra field. Test if our ziptool should be able to parse it.
+ entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+if __name__ == '__main__':
+ testsuite = unittest.TestLoader().discover(
+ os.path.dirname(os.path.realpath(__file__)))
+ unittest.TextTestRunner(verbosity=2).run(testsuite)
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 6b6502b..9812026 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -139,9 +139,60 @@
uint64_t cd_start_offset;
};
-static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
- ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
- return kInvalidFile;
+static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
+ off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
+ if (eocdOffset <= sizeof(Zip64EocdLocator)) {
+ ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
+ return kInvalidFile;
+ }
+ // We expect to find the zip64 eocd locator immediately before the zip eocd.
+ const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
+ Zip64EocdLocator zip64EocdLocator{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
+ sizeof(Zip64EocdLocator), locatorOffset)) {
+ ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
+ locatorOffset);
+ return kInvalidFile;
+ }
+
+ const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
+ if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
+ ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ Zip64EocdRecord zip64EocdRecord{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
+ sizeof(Zip64EocdRecord), zip64EocdOffset)) {
+ ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
+ zip64EocdOffset);
+ return kInvalidFile;
+ }
+
+ if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
+ ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
+ ", zip64 eocd offset %" PRIu64,
+ debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ *cdInfo = {.num_records = zip64EocdRecord.num_records,
+ .cd_size = zip64EocdRecord.cd_size,
+ .cd_start_offset = zip64EocdRecord.cd_start_offset};
+
+ return kSuccess;
}
static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
@@ -195,7 +246,7 @@
if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
eocd->cd_size, eocd->cd_start_offset);
- return FindCentralDirectoryInfoForZip64(cdInfo);
+ return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
}
/*
@@ -291,13 +342,104 @@
return kSuccess;
}
+static ZipError ParseZip64ExtendedInfoInExtraField(
+ const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
+ uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
+ Zip64ExtendedInfo* zip64Info) {
+ if (extraFieldLength <= 4) {
+ ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
+ extraFieldLength);
+ return kInvalidFile;
+ }
+
+ // Each header MUST consist of:
+ // Header ID - 2 bytes
+ // Data Size - 2 bytes
+ uint16_t offset = 0;
+ while (offset < extraFieldLength - 4) {
+ auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
+ auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
+
+ offset += 4;
+ if (dataSize > extraFieldLength - offset) {
+ ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
+ return kInvalidOffset;
+ }
+
+ // Skip the other types of extensible data fields. Details in
+ // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
+ if (headerId != Zip64ExtendedInfo::kHeaderId) {
+ offset += dataSize;
+ continue;
+ }
+
+ uint16_t expectedDataSize = 0;
+ // We expect the extended field to include both uncompressed and compressed size.
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ expectedDataSize += 16;
+ }
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ expectedDataSize += 8;
+ }
+
+ if (expectedDataSize == 0) {
+ ALOGW("Zip: Data size should not be 0 in zip64 extended field");
+ return kInvalidFile;
+ }
+
+ if (dataSize != expectedDataSize) {
+ auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
+ ? std::to_string(zip32LocalFileHeaderOffset.value())
+ : "missing";
+ ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
+ ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
+ expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
+ localOffsetString.c_str());
+ return kInvalidFile;
+ }
+
+ std::optional<uint64_t> uncompressedFileSize;
+ std::optional<uint64_t> compressedFileSize;
+ std::optional<uint64_t> localHeaderOffset;
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
+ compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
+ offset += 16;
+
+ // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
+ // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
+ // bytes counter to replace stream.total_out.
+ if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
+ ALOGW(
+ "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
+ ", compressed size %" PRIu64,
+ uncompressedFileSize.value(), compressedFileSize.value());
+ return kInvalidFile;
+ }
+ }
+
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
+ offset += 8;
+ }
+
+ zip64Info->uncompressed_file_size = uncompressedFileSize;
+ zip64Info->compressed_file_size = compressedFileSize;
+ zip64Info->local_header_offset = localHeaderOffset;
+ return kSuccess;
+ }
+
+ ALOGW("Zip: zip64 extended info isn't found in the extra field.");
+ return kInvalidFile;
+}
+
/*
* Parses the Zip archive's Central Directory. Allocates and populates the
* hash table.
*
* Returns 0 on success.
*/
-static int32_t ParseZipArchive(ZipArchive* archive) {
+static ZipError ParseZipArchive(ZipArchive* archive) {
const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
const size_t cd_length = archive->central_directory.GetMapLength();
const uint64_t num_entries = archive->num_entries;
@@ -327,7 +469,7 @@
return kInvalidFile;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
return kInvalidFile;
@@ -355,9 +497,15 @@
off64_t local_header_offset = cdr->local_file_header_offset;
if (local_header_offset == UINT32_MAX) {
- // TODO(xunchang) parse the zip64 eocd
- ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
- return kInvalidFile;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
}
if (local_header_offset >= archive->directory_offset) {
@@ -405,7 +553,7 @@
ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
- return 0;
+ return kSuccess;
}
static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@@ -522,7 +670,7 @@
return kInvalidOffset;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
// The offset of the start of the central directory in the zipfile.
// We keep this lying around so that we can sanity check all our lengths
@@ -546,8 +694,27 @@
// the extra field.
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
cdr->local_file_header_offset == UINT32_MAX) {
- ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
- return kInvalidFile;
+ const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
+ data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
+ data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
+ }
+
+ if (local_header_offset == UINT32_MAX) {
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
+ }
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -562,14 +729,68 @@
return kIoError;
}
- const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
-
+ auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
if (lfh->lfh_signature != LocalFileHeader::kSignature) {
ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
static_cast<int64_t>(local_header_offset));
return kInvalidOffset;
}
+ // Check that the local file header name matches the declared name in the central directory.
+ CHECK_LE(entryName.size(), UINT16_MAX);
+ auto nameLen = static_cast<uint16_t>(entryName.size());
+ if (lfh->file_name_length != nameLen) {
+ ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
+ std::string(entryName).c_str(), lfh->file_name_length, nameLen);
+ return kInconsistentInformation;
+ }
+ const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
+ if (name_offset > cd_offset - lfh->file_name_length) {
+ ALOGW("Zip: lfh name has invalid declared length");
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> name_buf(nameLen);
+ if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
+ ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
+ return kIoError;
+ }
+ if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
+ ALOGW("Zip: lfh name did not match central directory");
+ return kInconsistentInformation;
+ }
+
+ uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
+ uint64_t lfh_compressed_size = lfh->compressed_size;
+ if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
+ const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
+ const uint16_t lfh_extra_field_size = lfh->extra_field_length;
+ if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
+ ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
+ if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
+ lfh_extra_field_offset)) {
+ ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
+ return kIoError;
+ }
+
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
+ lfh->compressed_size, std::nullopt, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
+ lfh_compressed_size = zip64_info.compressed_file_size.value();
+ }
+
// Paranoia: Match the values specified in the local file header
// to those specified in the central directory.
@@ -595,12 +816,12 @@
// header agree on the crc, compressed, and uncompressed sizes of the entry.
if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
data->has_data_descriptor = 0;
- if (data->compressed_length != lfh->compressed_size ||
- data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
+ if (data->compressed_length != lfh_compressed_size ||
+ data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
- "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
- data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
- lfh->uncompressed_size, lfh->crc32);
+ "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
+ data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
+ lfh_uncompressed_size, lfh->crc32);
return kInconsistentInformation;
}
} else {
@@ -623,30 +844,6 @@
// Currently only needed to implement zipinfo.
data->is_text = (cdr->internal_file_attributes & 1);
- // Check that the local file header name matches the declared
- // name in the central directory.
- CHECK_LE(entryName.size(), UINT16_MAX);
- auto nameLen = static_cast<uint16_t>(entryName.size());
- if (lfh->file_name_length != nameLen) {
- ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
- std::string(entryName).c_str(), lfh->file_name_length, nameLen);
- return kInconsistentInformation;
- }
- const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
- if (name_offset + lfh->file_name_length > cd_offset) {
- ALOGW("Zip: lfh name has invalid declared length");
- return kInvalidOffset;
- }
- std::vector<uint8_t> name_buf(nameLen);
- if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
- ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
- return kIoError;
- }
- if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
- ALOGW("Zip: lfh name did not match central directory");
- return kInconsistentInformation;
- }
-
const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
lfh->file_name_length + lfh->extra_field_length;
if (data_offset > cd_offset) {
diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc
index 6c1a9a1..fbabf96 100644
--- a/libziparchive/zip_archive_test.cc
+++ b/libziparchive/zip_archive_test.cc
@@ -14,8 +14,6 @@
* limitations under the License.
*/
-#include "zip_archive_private.h"
-
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
@@ -23,6 +21,7 @@
#include <string.h>
#include <unistd.h>
+#include <map>
#include <memory>
#include <set>
#include <string_view>
@@ -31,12 +30,16 @@
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/mapped_file.h>
+#include <android-base/memory.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
#include <ziparchive/zip_archive.h>
#include <ziparchive/zip_archive_stream_entry.h>
+#include "zip_archive_common.h"
+#include "zip_archive_private.h"
+
static std::string test_data_dir = android::base::GetExecutableDirectory() + "/testdata";
static const std::string kValidZip = "valid.zip";
@@ -966,3 +969,290 @@
ASSERT_EQ(0u, writer.GetOutput().size());
}
}
+
+// The class constructs a zipfile with zip64 format, and test the parsing logic.
+class Zip64ParseTest : public ::testing::Test {
+ protected:
+ struct LocalFileEntry {
+ std::vector<uint8_t> local_file_header;
+ std::string file_name;
+ std::vector<uint8_t> extended_field;
+ // Fake data to mimic the compressed bytes in the zipfile.
+ std::vector<uint8_t> compressed_bytes;
+
+ size_t GetSize() const {
+ return local_file_header.size() + file_name.size() + extended_field.size() +
+ compressed_bytes.size();
+ }
+
+ void CopyToOutput(std::vector<uint8_t>* output) const {
+ std::copy(local_file_header.begin(), local_file_header.end(), std::back_inserter(*output));
+ std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
+ std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
+ std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output));
+ }
+ };
+
+ struct CdRecordEntry {
+ std::vector<uint8_t> central_directory_record;
+ std::string file_name;
+ std::vector<uint8_t> extended_field;
+
+ size_t GetSize() const {
+ return central_directory_record.size() + file_name.size() + extended_field.size();
+ }
+
+ void CopyToOutput(std::vector<uint8_t>* output) const {
+ std::copy(central_directory_record.begin(), central_directory_record.end(),
+ std::back_inserter(*output));
+ std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
+ std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
+ }
+ };
+
+ static void ConstructLocalFileHeader(const std::string& name, std::vector<uint8_t>* output,
+ uint32_t uncompressed_size, uint32_t compressed_size) {
+ LocalFileHeader lfh = {};
+ lfh.lfh_signature = LocalFileHeader::kSignature;
+ lfh.compressed_size = compressed_size;
+ lfh.uncompressed_size = uncompressed_size;
+ lfh.file_name_length = static_cast<uint16_t>(name.size());
+ lfh.extra_field_length = 20;
+ *output = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&lfh),
+ reinterpret_cast<uint8_t*>(&lfh) + sizeof(LocalFileHeader));
+ }
+
+ // Put one zip64 extended info in the extended field.
+ static void ConstructExtendedField(const std::vector<uint64_t>& zip64_fields,
+ std::vector<uint8_t>* output) {
+ ASSERT_FALSE(zip64_fields.empty());
+ uint16_t data_size = 8 * static_cast<uint16_t>(zip64_fields.size());
+ std::vector<uint8_t> extended_field(data_size + 4);
+ android::base::put_unaligned(extended_field.data(), Zip64ExtendedInfo::kHeaderId);
+ android::base::put_unaligned(extended_field.data() + 2, data_size);
+ size_t offset = 4;
+ for (const auto& field : zip64_fields) {
+ android::base::put_unaligned(extended_field.data() + offset, field);
+ offset += 8;
+ }
+
+ *output = std::move(extended_field);
+ }
+
+ static void ConstructCentralDirectoryRecord(const std::string& name, uint32_t uncompressed_size,
+ uint32_t compressed_size, uint32_t local_offset,
+ std::vector<uint8_t>* output) {
+ CentralDirectoryRecord cdr = {};
+ cdr.record_signature = CentralDirectoryRecord::kSignature;
+ cdr.compressed_size = uncompressed_size;
+ cdr.uncompressed_size = compressed_size;
+ cdr.file_name_length = static_cast<uint16_t>(name.size());
+ cdr.extra_field_length = local_offset == UINT32_MAX ? 28 : 20;
+ cdr.local_file_header_offset = local_offset;
+ *output =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&cdr),
+ reinterpret_cast<uint8_t*>(&cdr) + sizeof(CentralDirectoryRecord));
+ }
+
+ // Add an entry to the zipfile, construct the corresponding local header and cd entry.
+ void AddEntry(const std::string& name, const std::vector<uint8_t>& content,
+ bool uncompressed_size_in_extended, bool compressed_size_in_extended,
+ bool local_offset_in_extended) {
+ auto uncompressed_size = static_cast<uint32_t>(content.size());
+ auto compressed_size = static_cast<uint32_t>(content.size());
+ uint32_t local_file_header_offset = 0;
+ std::for_each(file_entries_.begin(), file_entries_.end(),
+ [&local_file_header_offset](const LocalFileEntry& file_entry) {
+ local_file_header_offset += file_entry.GetSize();
+ });
+
+ std::vector<uint64_t> zip64_fields;
+ if (uncompressed_size_in_extended) {
+ zip64_fields.push_back(uncompressed_size);
+ uncompressed_size = UINT32_MAX;
+ }
+ if (compressed_size_in_extended) {
+ zip64_fields.push_back(compressed_size);
+ compressed_size = UINT32_MAX;
+ }
+ LocalFileEntry local_entry = {
+ .local_file_header = {},
+ .file_name = name,
+ .extended_field = {},
+ .compressed_bytes = content,
+ };
+ ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size,
+ compressed_size);
+ ConstructExtendedField(zip64_fields, &local_entry.extended_field);
+ file_entries_.push_back(std::move(local_entry));
+
+ if (local_offset_in_extended) {
+ zip64_fields.push_back(local_file_header_offset);
+ local_file_header_offset = UINT32_MAX;
+ }
+ CdRecordEntry cd_entry = {
+ .central_directory_record = {},
+ .file_name = name,
+ .extended_field = {},
+ };
+ ConstructCentralDirectoryRecord(name, uncompressed_size, compressed_size,
+ local_file_header_offset, &cd_entry.central_directory_record);
+ ConstructExtendedField(zip64_fields, &cd_entry.extended_field);
+ cd_entries_.push_back(std::move(cd_entry));
+ }
+
+ void ConstructEocd() {
+ ASSERT_EQ(file_entries_.size(), cd_entries_.size());
+ Zip64EocdRecord zip64_eocd = {};
+ zip64_eocd.record_signature = Zip64EocdRecord::kSignature;
+ zip64_eocd.num_records = file_entries_.size();
+ zip64_eocd.cd_size = 0;
+ std::for_each(
+ cd_entries_.begin(), cd_entries_.end(),
+ [&zip64_eocd](const CdRecordEntry& cd_entry) { zip64_eocd.cd_size += cd_entry.GetSize(); });
+ zip64_eocd.cd_start_offset = 0;
+ std::for_each(file_entries_.begin(), file_entries_.end(),
+ [&zip64_eocd](const LocalFileEntry& file_entry) {
+ zip64_eocd.cd_start_offset += file_entry.GetSize();
+ });
+ zip64_eocd_record_ =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_eocd),
+ reinterpret_cast<uint8_t*>(&zip64_eocd) + sizeof(Zip64EocdRecord));
+
+ Zip64EocdLocator zip64_locator = {};
+ zip64_locator.locator_signature = Zip64EocdLocator::kSignature;
+ zip64_locator.zip64_eocd_offset = zip64_eocd.cd_start_offset + zip64_eocd.cd_size;
+ zip64_eocd_locator_ =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_locator),
+ reinterpret_cast<uint8_t*>(&zip64_locator) + sizeof(Zip64EocdLocator));
+
+ EocdRecord eocd = {};
+ eocd.eocd_signature = EocdRecord::kSignature,
+ eocd.num_records = file_entries_.size() > UINT16_MAX
+ ? UINT16_MAX
+ : static_cast<uint16_t>(file_entries_.size());
+ eocd.cd_size = UINT32_MAX;
+ eocd.cd_start_offset = UINT32_MAX;
+ eocd_record_ = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&eocd),
+ reinterpret_cast<uint8_t*>(&eocd) + sizeof(EocdRecord));
+ }
+
+ // Concatenate all the local file entries, cd entries, and eocd metadata.
+ void ConstructZipFile() {
+ for (const auto& file_entry : file_entries_) {
+ file_entry.CopyToOutput(&zip_content_);
+ }
+ for (const auto& cd_entry : cd_entries_) {
+ cd_entry.CopyToOutput(&zip_content_);
+ }
+ std::copy(zip64_eocd_record_.begin(), zip64_eocd_record_.end(),
+ std::back_inserter(zip_content_));
+ std::copy(zip64_eocd_locator_.begin(), zip64_eocd_locator_.end(),
+ std::back_inserter(zip_content_));
+ std::copy(eocd_record_.begin(), eocd_record_.end(), std::back_inserter(zip_content_));
+ }
+
+ std::vector<uint8_t> zip_content_;
+
+ std::vector<LocalFileEntry> file_entries_;
+ std::vector<CdRecordEntry> cd_entries_;
+ std::vector<uint8_t> zip64_eocd_record_;
+ std::vector<uint8_t> zip64_eocd_locator_;
+ std::vector<uint8_t> eocd_record_;
+};
+
+TEST_F(Zip64ParseTest, openFile) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFilelocalOffsetInExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, true);
+ AddEntry("b.txt", std::vector<uint8_t>(200, 'b'), true, true, true);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFileCompressedNotInExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, false, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ // Zip64 extended fields must include both uncompressed and compressed size.
+ ASSERT_NE(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, findEntry) {
+ AddEntry("a.txt", std::vector<uint8_t>(200, 'a'), true, true, true);
+ AddEntry("b.txt", std::vector<uint8_t>(300, 'b'), true, true, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry));
+ ASSERT_EQ(200, entry.uncompressed_length);
+ ASSERT_EQ(200, entry.compressed_length);
+
+ ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry));
+ ASSERT_EQ(300, entry.uncompressed_length);
+ ASSERT_EQ(300, entry.compressed_length);
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFileIncorrectDataSizeInLocalExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
+ ASSERT_EQ(1, file_entries_.size());
+ auto& extended_field = file_entries_[0].extended_field;
+ // data size exceeds the extended field size in local header.
+ android::base::put_unaligned<uint16_t>(extended_field.data() + 2, 30);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_NE(0, FindEntry(handle, "a.txt", &entry));
+
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, iterates) {
+ std::set<std::string_view> names{"a.txt", "b.txt", "c.txt", "d.txt", "e.txt"};
+ for (const auto& name : names) {
+ AddEntry(std::string(name), std::vector<uint8_t>(100, name[0]), true, true, true);
+ }
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+
+ void* iteration_cookie;
+ ASSERT_EQ(0, StartIteration(handle, &iteration_cookie));
+ std::set<std::string_view> result;
+ std::string_view name;
+ ZipEntry entry;
+ while (Next(iteration_cookie, &entry, &name) == 0) result.emplace(name);
+ ASSERT_EQ(names, result);
+
+ CloseArchive(handle);
+}