Implement the functions to parse zip64 structs
Implement the logic to parse zip64 eocd and zip64 extended info
in the extra field. Also add unit tests and python tests which
create packages larger than 4GiB.
The extraction of zip entry size > 4GiB will be supported in the follow
ups.
Bug: 150900468
Test: unit tests pass
Change-Id: I4cd9ebbd9709b3d2f9cd293625d2c79024bb45a5
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 44c47f3..9b6213a 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -67,5 +67,11 @@
{
"name": "ziparchive-tests"
}
+ ],
+
+ "postsubmit": [
+ {
+ "name": "ziparchive_tests_large"
+ }
]
}
diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp
index 4081b21..786e7b3 100644
--- a/libziparchive/Android.bp
+++ b/libziparchive/Android.bp
@@ -212,3 +212,20 @@
data: ["cli-tests/**/*"],
target_required: ["cli-test", "ziptool"],
}
+
+python_test_host {
+ name: "ziparchive_tests_large",
+ srcs: ["test_ziparchive_large.py"],
+ main: "test_ziparchive_large.py",
+ version: {
+ py2: {
+ enabled: true,
+ embedded_launcher: false,
+ },
+ py3: {
+ enabled: false,
+ embedded_launcher: false,
+ },
+ },
+ test_suites: ["general-tests"],
+}
diff --git a/libziparchive/test_ziparchive_large.py b/libziparchive/test_ziparchive_large.py
new file mode 100644
index 0000000..c29c37e
--- /dev/null
+++ b/libziparchive/test_ziparchive_large.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unittests for parsing files in zip64 format"""
+
+import os
+import subprocess
+import tempfile
+import unittest
+import zipfile
+import time
+
+class Zip64Test(unittest.TestCase):
+ @staticmethod
+ def _AddEntriesToZip(output_zip, entries_dict=None):
+ for name, size in entries_dict.items():
+ contents = name[0] * 1024
+ file_path = tempfile.NamedTemporaryFile()
+ with open(file_path.name, 'w') as f:
+ for it in range(0, size):
+ f.write(contents)
+ output_zip.write(file_path.name, arcname = name)
+
+ def _getEntryNames(self, zip_name):
+ cmd = ['ziptool', 'zipinfo', '-1', zip_name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ output, _ = proc.communicate()
+ self.assertEquals(0, proc.returncode)
+ self.assertNotEqual(None, output)
+ return output.split()
+
+ def _ExtractEntries(self, zip_name):
+ temp_dir = tempfile.mkdtemp()
+ cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc.communicate()
+ self.assertEquals(0, proc.returncode)
+
+ def test_entriesSmallerThan2G(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
+ # than 4GiB in size.
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
+ 'd.txt': 1025 * 1024, 'e.txt': 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+ def test_largeNumberOfEntries(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ entry_dict = {}
+ # Add 100k entries (more than 65535|UINT16_MAX).
+ for num in range(0, 100 * 1024):
+ entry_dict[str(num)] = 50
+
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+ def test_largeCompressedEntries(self):
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
+ allowZip64=True) as output_zip:
+ # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
+ # sizes in the extra field. Test if our ziptool should be able to parse it.
+ entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
+
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
+
+
+if __name__ == '__main__':
+ testsuite = unittest.TestLoader().discover(
+ os.path.dirname(os.path.realpath(__file__)))
+ unittest.TextTestRunner(verbosity=2).run(testsuite)
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index afbc5d8..5b976d0 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -138,9 +138,60 @@
uint64_t cd_start_offset;
};
-static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
- ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
- return kInvalidFile;
+static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
+ off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
+ if (eocdOffset <= sizeof(Zip64EocdLocator)) {
+ ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
+ return kInvalidFile;
+ }
+ // We expect to find the zip64 eocd locator immediately before the zip eocd.
+ const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
+ Zip64EocdLocator zip64EocdLocator{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
+ sizeof(Zip64EocdLocator), locatorOffset)) {
+ ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
+ locatorOffset);
+ return kInvalidFile;
+ }
+
+ const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
+ if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
+ ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ Zip64EocdRecord zip64EocdRecord{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
+ sizeof(Zip64EocdRecord), zip64EocdOffset)) {
+ ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
+ zip64EocdOffset);
+ return kInvalidFile;
+ }
+
+ if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
+ ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
+ ", zip64 eocd offset %" PRIu64,
+ debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ *cdInfo = {.num_records = zip64EocdRecord.num_records,
+ .cd_size = zip64EocdRecord.cd_size,
+ .cd_start_offset = zip64EocdRecord.cd_start_offset};
+
+ return kSuccess;
}
static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
@@ -194,7 +245,7 @@
if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
eocd->cd_size, eocd->cd_start_offset);
- return FindCentralDirectoryInfoForZip64(cdInfo);
+ return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
}
/*
@@ -290,13 +341,104 @@
return kSuccess;
}
+static ZipError ParseZip64ExtendedInfoInExtraField(
+ const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
+ uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
+ Zip64ExtendedInfo* zip64Info) {
+ if (extraFieldLength <= 4) {
+ ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
+ extraFieldLength);
+ return kInvalidFile;
+ }
+
+ // Each header MUST consist of:
+ // Header ID - 2 bytes
+ // Data Size - 2 bytes
+ uint16_t offset = 0;
+ while (offset < extraFieldLength - 4) {
+ auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
+ auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
+
+ offset += 4;
+ if (dataSize > extraFieldLength - offset) {
+ ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
+ return kInvalidOffset;
+ }
+
+ // Skip the other types of extensible data fields. Details in
+ // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
+ if (headerId != Zip64ExtendedInfo::kHeaderId) {
+ offset += dataSize;
+ continue;
+ }
+
+ uint16_t expectedDataSize = 0;
+ // We expect the extended field to include both uncompressed and compressed size.
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ expectedDataSize += 16;
+ }
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ expectedDataSize += 8;
+ }
+
+ if (expectedDataSize == 0) {
+ ALOGW("Zip: Data size should not be 0 in zip64 extended field");
+ return kInvalidFile;
+ }
+
+ if (dataSize != expectedDataSize) {
+ auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
+ ? std::to_string(zip32LocalFileHeaderOffset.value())
+ : "missing";
+ ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
+ ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
+ expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
+ localOffsetString.c_str());
+ return kInvalidFile;
+ }
+
+ std::optional<uint64_t> uncompressedFileSize;
+ std::optional<uint64_t> compressedFileSize;
+ std::optional<uint64_t> localHeaderOffset;
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
+ compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
+ offset += 16;
+
+ // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
+ // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
+ // bytes counter to replace stream.total_out.
+ if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
+ ALOGW(
+ "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
+ ", compressed size %" PRIu64,
+ uncompressedFileSize.value(), compressedFileSize.value());
+ return kInvalidFile;
+ }
+ }
+
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
+ offset += 8;
+ }
+
+ zip64Info->uncompressed_file_size = uncompressedFileSize;
+ zip64Info->compressed_file_size = compressedFileSize;
+ zip64Info->local_header_offset = localHeaderOffset;
+ return kSuccess;
+ }
+
+ ALOGW("Zip: zip64 extended info isn't found in the extra field.");
+ return kInvalidFile;
+}
+
/*
* Parses the Zip archive's Central Directory. Allocates and populates the
* hash table.
*
* Returns 0 on success.
*/
-static int32_t ParseZipArchive(ZipArchive* archive) {
+static ZipError ParseZipArchive(ZipArchive* archive) {
const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
const size_t cd_length = archive->central_directory.GetMapLength();
const uint64_t num_entries = archive->num_entries;
@@ -326,7 +468,7 @@
return kInvalidFile;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
return kInvalidFile;
@@ -354,9 +496,15 @@
off64_t local_header_offset = cdr->local_file_header_offset;
if (local_header_offset == UINT32_MAX) {
- // TODO(xunchang) parse the zip64 eocd
- ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
- return kInvalidFile;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
}
if (local_header_offset >= archive->directory_offset) {
@@ -404,7 +552,7 @@
ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
- return 0;
+ return kSuccess;
}
static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@@ -521,7 +669,7 @@
return kInvalidOffset;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
// The offset of the start of the central directory in the zipfile.
// We keep this lying around so that we can sanity check all our lengths
@@ -545,8 +693,27 @@
// the extra field.
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
cdr->local_file_header_offset == UINT32_MAX) {
- ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
- return kInvalidFile;
+ const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
+ data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
+ data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
+ }
+
+ if (local_header_offset == UINT32_MAX) {
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
+ }
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -561,14 +728,68 @@
return kIoError;
}
- const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
-
+ auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
if (lfh->lfh_signature != LocalFileHeader::kSignature) {
ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
static_cast<int64_t>(local_header_offset));
return kInvalidOffset;
}
+ // Check that the local file header name matches the declared name in the central directory.
+ CHECK_LE(entryName.size(), UINT16_MAX);
+ auto nameLen = static_cast<uint16_t>(entryName.size());
+ if (lfh->file_name_length != nameLen) {
+ ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
+ std::string(entryName).c_str(), lfh->file_name_length, nameLen);
+ return kInconsistentInformation;
+ }
+ const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
+ if (name_offset > cd_offset - lfh->file_name_length) {
+ ALOGW("Zip: lfh name has invalid declared length");
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> name_buf(nameLen);
+ if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
+ ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
+ return kIoError;
+ }
+ if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
+ ALOGW("Zip: lfh name did not match central directory");
+ return kInconsistentInformation;
+ }
+
+ uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
+ uint64_t lfh_compressed_size = lfh->compressed_size;
+ if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
+ const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
+ const uint16_t lfh_extra_field_size = lfh->extra_field_length;
+ if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
+ ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
+ if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
+ lfh_extra_field_offset)) {
+ ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
+ return kIoError;
+ }
+
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
+ lfh->compressed_size, std::nullopt, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
+ lfh_compressed_size = zip64_info.compressed_file_size.value();
+ }
+
// Paranoia: Match the values specified in the local file header
// to those specified in the central directory.
@@ -594,12 +815,12 @@
// header agree on the crc, compressed, and uncompressed sizes of the entry.
if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
data->has_data_descriptor = 0;
- if (data->compressed_length != lfh->compressed_size ||
- data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
+ if (data->compressed_length != lfh_compressed_size ||
+ data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
- "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
- data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
- lfh->uncompressed_size, lfh->crc32);
+ "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
+ data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
+ lfh_uncompressed_size, lfh->crc32);
return kInconsistentInformation;
}
} else {
@@ -622,30 +843,6 @@
// Currently only needed to implement zipinfo.
data->is_text = (cdr->internal_file_attributes & 1);
- // Check that the local file header name matches the declared
- // name in the central directory.
- CHECK_LE(entryName.size(), UINT16_MAX);
- auto nameLen = static_cast<uint16_t>(entryName.size());
- if (lfh->file_name_length != nameLen) {
- ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
- std::string(entryName).c_str(), lfh->file_name_length, nameLen);
- return kInconsistentInformation;
- }
- const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
- if (name_offset + lfh->file_name_length > cd_offset) {
- ALOGW("Zip: lfh name has invalid declared length");
- return kInvalidOffset;
- }
- std::vector<uint8_t> name_buf(nameLen);
- if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
- ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
- return kIoError;
- }
- if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
- ALOGW("Zip: lfh name did not match central directory");
- return kInconsistentInformation;
- }
-
const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
lfh->file_name_length + lfh->extra_field_length;
if (data_offset > cd_offset) {
diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc
index 5caca8a..10050da 100644
--- a/libziparchive/zip_archive_test.cc
+++ b/libziparchive/zip_archive_test.cc
@@ -14,8 +14,6 @@
* limitations under the License.
*/
-#include "zip_archive_private.h"
-
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
@@ -23,6 +21,7 @@
#include <string.h>
#include <unistd.h>
+#include <map>
#include <memory>
#include <set>
#include <string_view>
@@ -31,12 +30,16 @@
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/mapped_file.h>
+#include <android-base/memory.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
#include <ziparchive/zip_archive.h>
#include <ziparchive/zip_archive_stream_entry.h>
+#include "zip_archive_common.h"
+#include "zip_archive_private.h"
+
static std::string test_data_dir = android::base::GetExecutableDirectory() + "/testdata";
static const std::string kValidZip = "valid.zip";
@@ -930,3 +933,290 @@
ASSERT_EQ(0u, writer.GetOutput().size());
}
}
+
+// The class constructs a zipfile with zip64 format, and test the parsing logic.
+class Zip64ParseTest : public ::testing::Test {
+ protected:
+ struct LocalFileEntry {
+ std::vector<uint8_t> local_file_header;
+ std::string file_name;
+ std::vector<uint8_t> extended_field;
+ // Fake data to mimic the compressed bytes in the zipfile.
+ std::vector<uint8_t> compressed_bytes;
+
+ size_t GetSize() const {
+ return local_file_header.size() + file_name.size() + extended_field.size() +
+ compressed_bytes.size();
+ }
+
+ void CopyToOutput(std::vector<uint8_t>* output) const {
+ std::copy(local_file_header.begin(), local_file_header.end(), std::back_inserter(*output));
+ std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
+ std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
+ std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output));
+ }
+ };
+
+ struct CdRecordEntry {
+ std::vector<uint8_t> central_directory_record;
+ std::string file_name;
+ std::vector<uint8_t> extended_field;
+
+ size_t GetSize() const {
+ return central_directory_record.size() + file_name.size() + extended_field.size();
+ }
+
+ void CopyToOutput(std::vector<uint8_t>* output) const {
+ std::copy(central_directory_record.begin(), central_directory_record.end(),
+ std::back_inserter(*output));
+ std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
+ std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
+ }
+ };
+
+ static void ConstructLocalFileHeader(const std::string& name, std::vector<uint8_t>* output,
+ uint32_t uncompressed_size, uint32_t compressed_size) {
+ LocalFileHeader lfh = {};
+ lfh.lfh_signature = LocalFileHeader::kSignature;
+ lfh.compressed_size = compressed_size;
+ lfh.uncompressed_size = uncompressed_size;
+ lfh.file_name_length = static_cast<uint16_t>(name.size());
+ lfh.extra_field_length = 20;
+ *output = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&lfh),
+ reinterpret_cast<uint8_t*>(&lfh) + sizeof(LocalFileHeader));
+ }
+
+ // Put one zip64 extended info in the extended field.
+ static void ConstructExtendedField(const std::vector<uint64_t>& zip64_fields,
+ std::vector<uint8_t>* output) {
+ ASSERT_FALSE(zip64_fields.empty());
+ uint16_t data_size = 8 * static_cast<uint16_t>(zip64_fields.size());
+ std::vector<uint8_t> extended_field(data_size + 4);
+ android::base::put_unaligned(extended_field.data(), Zip64ExtendedInfo::kHeaderId);
+ android::base::put_unaligned(extended_field.data() + 2, data_size);
+ size_t offset = 4;
+ for (const auto& field : zip64_fields) {
+ android::base::put_unaligned(extended_field.data() + offset, field);
+ offset += 8;
+ }
+
+ *output = std::move(extended_field);
+ }
+
+ static void ConstructCentralDirectoryRecord(const std::string& name, uint32_t uncompressed_size,
+ uint32_t compressed_size, uint32_t local_offset,
+ std::vector<uint8_t>* output) {
+ CentralDirectoryRecord cdr = {};
+ cdr.record_signature = CentralDirectoryRecord::kSignature;
+ cdr.compressed_size = uncompressed_size;
+ cdr.uncompressed_size = compressed_size;
+ cdr.file_name_length = static_cast<uint16_t>(name.size());
+ cdr.extra_field_length = local_offset == UINT32_MAX ? 28 : 20;
+ cdr.local_file_header_offset = local_offset;
+ *output =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&cdr),
+ reinterpret_cast<uint8_t*>(&cdr) + sizeof(CentralDirectoryRecord));
+ }
+
+ // Add an entry to the zipfile, construct the corresponding local header and cd entry.
+ void AddEntry(const std::string& name, const std::vector<uint8_t>& content,
+ bool uncompressed_size_in_extended, bool compressed_size_in_extended,
+ bool local_offset_in_extended) {
+ auto uncompressed_size = static_cast<uint32_t>(content.size());
+ auto compressed_size = static_cast<uint32_t>(content.size());
+ uint32_t local_file_header_offset = 0;
+ std::for_each(file_entries_.begin(), file_entries_.end(),
+ [&local_file_header_offset](const LocalFileEntry& file_entry) {
+ local_file_header_offset += file_entry.GetSize();
+ });
+
+ std::vector<uint64_t> zip64_fields;
+ if (uncompressed_size_in_extended) {
+ zip64_fields.push_back(uncompressed_size);
+ uncompressed_size = UINT32_MAX;
+ }
+ if (compressed_size_in_extended) {
+ zip64_fields.push_back(compressed_size);
+ compressed_size = UINT32_MAX;
+ }
+ LocalFileEntry local_entry = {
+ .local_file_header = {},
+ .file_name = name,
+ .extended_field = {},
+ .compressed_bytes = content,
+ };
+ ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size,
+ compressed_size);
+ ConstructExtendedField(zip64_fields, &local_entry.extended_field);
+ file_entries_.push_back(std::move(local_entry));
+
+ if (local_offset_in_extended) {
+ zip64_fields.push_back(local_file_header_offset);
+ local_file_header_offset = UINT32_MAX;
+ }
+ CdRecordEntry cd_entry = {
+ .central_directory_record = {},
+ .file_name = name,
+ .extended_field = {},
+ };
+ ConstructCentralDirectoryRecord(name, uncompressed_size, compressed_size,
+ local_file_header_offset, &cd_entry.central_directory_record);
+ ConstructExtendedField(zip64_fields, &cd_entry.extended_field);
+ cd_entries_.push_back(std::move(cd_entry));
+ }
+
+ void ConstructEocd() {
+ ASSERT_EQ(file_entries_.size(), cd_entries_.size());
+ Zip64EocdRecord zip64_eocd = {};
+ zip64_eocd.record_signature = Zip64EocdRecord::kSignature;
+ zip64_eocd.num_records = file_entries_.size();
+ zip64_eocd.cd_size = 0;
+ std::for_each(
+ cd_entries_.begin(), cd_entries_.end(),
+ [&zip64_eocd](const CdRecordEntry& cd_entry) { zip64_eocd.cd_size += cd_entry.GetSize(); });
+ zip64_eocd.cd_start_offset = 0;
+ std::for_each(file_entries_.begin(), file_entries_.end(),
+ [&zip64_eocd](const LocalFileEntry& file_entry) {
+ zip64_eocd.cd_start_offset += file_entry.GetSize();
+ });
+ zip64_eocd_record_ =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_eocd),
+ reinterpret_cast<uint8_t*>(&zip64_eocd) + sizeof(Zip64EocdRecord));
+
+ Zip64EocdLocator zip64_locator = {};
+ zip64_locator.locator_signature = Zip64EocdLocator::kSignature;
+ zip64_locator.zip64_eocd_offset = zip64_eocd.cd_start_offset + zip64_eocd.cd_size;
+ zip64_eocd_locator_ =
+ std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_locator),
+ reinterpret_cast<uint8_t*>(&zip64_locator) + sizeof(Zip64EocdLocator));
+
+ EocdRecord eocd = {};
+ eocd.eocd_signature = EocdRecord::kSignature,
+ eocd.num_records = file_entries_.size() > UINT16_MAX
+ ? UINT16_MAX
+ : static_cast<uint16_t>(file_entries_.size());
+ eocd.cd_size = UINT32_MAX;
+ eocd.cd_start_offset = UINT32_MAX;
+ eocd_record_ = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&eocd),
+ reinterpret_cast<uint8_t*>(&eocd) + sizeof(EocdRecord));
+ }
+
+ // Concatenate all the local file entries, cd entries, and eocd metadata.
+ void ConstructZipFile() {
+ for (const auto& file_entry : file_entries_) {
+ file_entry.CopyToOutput(&zip_content_);
+ }
+ for (const auto& cd_entry : cd_entries_) {
+ cd_entry.CopyToOutput(&zip_content_);
+ }
+ std::copy(zip64_eocd_record_.begin(), zip64_eocd_record_.end(),
+ std::back_inserter(zip_content_));
+ std::copy(zip64_eocd_locator_.begin(), zip64_eocd_locator_.end(),
+ std::back_inserter(zip_content_));
+ std::copy(eocd_record_.begin(), eocd_record_.end(), std::back_inserter(zip_content_));
+ }
+
+ std::vector<uint8_t> zip_content_;
+
+ std::vector<LocalFileEntry> file_entries_;
+ std::vector<CdRecordEntry> cd_entries_;
+ std::vector<uint8_t> zip64_eocd_record_;
+ std::vector<uint8_t> zip64_eocd_locator_;
+ std::vector<uint8_t> eocd_record_;
+};
+
+TEST_F(Zip64ParseTest, openFile) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFilelocalOffsetInExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, true);
+ AddEntry("b.txt", std::vector<uint8_t>(200, 'b'), true, true, true);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFileCompressedNotInExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, false, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ // Zip64 extended fields must include both uncompressed and compressed size.
+ ASSERT_NE(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, findEntry) {
+ AddEntry("a.txt", std::vector<uint8_t>(200, 'a'), true, true, true);
+ AddEntry("b.txt", std::vector<uint8_t>(300, 'b'), true, true, false);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry));
+ ASSERT_EQ(200, entry.uncompressed_length);
+ ASSERT_EQ(200, entry.compressed_length);
+
+ ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry));
+ ASSERT_EQ(300, entry.uncompressed_length);
+ ASSERT_EQ(300, entry.compressed_length);
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, openFileIncorrectDataSizeInLocalExtendedField) {
+ AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
+ ASSERT_EQ(1, file_entries_.size());
+ auto& extended_field = file_entries_[0].extended_field;
+ // data size exceeds the extended field size in local header.
+ android::base::put_unaligned<uint16_t>(extended_field.data() + 2, 30);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_NE(0, FindEntry(handle, "a.txt", &entry));
+
+ CloseArchive(handle);
+}
+
+TEST_F(Zip64ParseTest, iterates) {
+ std::set<std::string_view> names{"a.txt", "b.txt", "c.txt", "d.txt", "e.txt"};
+ for (const auto& name : names) {
+ AddEntry(std::string(name), std::vector<uint8_t>(100, name[0]), true, true, true);
+ }
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+
+ void* iteration_cookie;
+ ASSERT_EQ(0, StartIteration(handle, &iteration_cookie));
+ std::set<std::string_view> result;
+ std::string_view name;
+ ZipEntry entry;
+ while (Next(iteration_cookie, &entry, &name) == 0) result.emplace(name);
+ ASSERT_EQ(names, result);
+
+ CloseArchive(handle);
+}