Implement the functions to parse zip64 structs
Implement the logic to parse zip64 eocd and zip64 extended info
in the extra field. Also add unit tests and python tests which
create packages larger than 4GiB.
The extraction of zip entry size > 4GiB will be supported in the follow
ups.
Bug: 150900468
Test: unit tests pass
Change-Id: I4cd9ebbd9709b3d2f9cd293625d2c79024bb45a5
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index afbc5d8..5b976d0 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -138,9 +138,60 @@
uint64_t cd_start_offset;
};
-static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
- ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
- return kInvalidFile;
+static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
+ off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
+ if (eocdOffset <= sizeof(Zip64EocdLocator)) {
+ ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
+ return kInvalidFile;
+ }
+ // We expect to find the zip64 eocd locator immediately before the zip eocd.
+ const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
+ Zip64EocdLocator zip64EocdLocator{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
+ sizeof(Zip64EocdLocator), locatorOffset)) {
+ ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
+ locatorOffset);
+ return kInvalidFile;
+ }
+
+ const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
+ if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
+ ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ Zip64EocdRecord zip64EocdRecord{};
+ if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
+ sizeof(Zip64EocdRecord), zip64EocdOffset)) {
+ ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
+ sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
+ return kIoError;
+ }
+
+ if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
+ ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
+ zip64EocdOffset);
+ return kInvalidFile;
+ }
+
+ if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
+ ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
+ ", zip64 eocd offset %" PRIu64,
+ debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
+ return kInvalidOffset;
+ }
+
+ *cdInfo = {.num_records = zip64EocdRecord.num_records,
+ .cd_size = zip64EocdRecord.cd_size,
+ .cd_start_offset = zip64EocdRecord.cd_start_offset};
+
+ return kSuccess;
}
static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
@@ -194,7 +245,7 @@
if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
eocd->cd_size, eocd->cd_start_offset);
- return FindCentralDirectoryInfoForZip64(cdInfo);
+ return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
}
/*
@@ -290,13 +341,104 @@
return kSuccess;
}
+static ZipError ParseZip64ExtendedInfoInExtraField(
+ const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
+ uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
+ Zip64ExtendedInfo* zip64Info) {
+ if (extraFieldLength <= 4) {
+ ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
+ extraFieldLength);
+ return kInvalidFile;
+ }
+
+ // Each header MUST consist of:
+ // Header ID - 2 bytes
+ // Data Size - 2 bytes
+ uint16_t offset = 0;
+ while (offset < extraFieldLength - 4) {
+ auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
+ auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
+
+ offset += 4;
+ if (dataSize > extraFieldLength - offset) {
+ ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
+ return kInvalidOffset;
+ }
+
+ // Skip the other types of extensible data fields. Details in
+ // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
+ if (headerId != Zip64ExtendedInfo::kHeaderId) {
+ offset += dataSize;
+ continue;
+ }
+
+ uint16_t expectedDataSize = 0;
+ // We expect the extended field to include both uncompressed and compressed size.
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ expectedDataSize += 16;
+ }
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ expectedDataSize += 8;
+ }
+
+ if (expectedDataSize == 0) {
+ ALOGW("Zip: Data size should not be 0 in zip64 extended field");
+ return kInvalidFile;
+ }
+
+ if (dataSize != expectedDataSize) {
+ auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
+ ? std::to_string(zip32LocalFileHeaderOffset.value())
+ : "missing";
+ ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
+ ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
+ expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
+ localOffsetString.c_str());
+ return kInvalidFile;
+ }
+
+ std::optional<uint64_t> uncompressedFileSize;
+ std::optional<uint64_t> compressedFileSize;
+ std::optional<uint64_t> localHeaderOffset;
+ if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
+ uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
+ compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
+ offset += 16;
+
+ // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
+ // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
+ // bytes counter to replace stream.total_out.
+ if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
+ ALOGW(
+ "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
+ ", compressed size %" PRIu64,
+ uncompressedFileSize.value(), compressedFileSize.value());
+ return kInvalidFile;
+ }
+ }
+
+ if (zip32LocalFileHeaderOffset == UINT32_MAX) {
+ localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
+ offset += 8;
+ }
+
+ zip64Info->uncompressed_file_size = uncompressedFileSize;
+ zip64Info->compressed_file_size = compressedFileSize;
+ zip64Info->local_header_offset = localHeaderOffset;
+ return kSuccess;
+ }
+
+ ALOGW("Zip: zip64 extended info isn't found in the extra field.");
+ return kInvalidFile;
+}
+
/*
* Parses the Zip archive's Central Directory. Allocates and populates the
* hash table.
*
* Returns 0 on success.
*/
-static int32_t ParseZipArchive(ZipArchive* archive) {
+static ZipError ParseZipArchive(ZipArchive* archive) {
const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
const size_t cd_length = archive->central_directory.GetMapLength();
const uint64_t num_entries = archive->num_entries;
@@ -326,7 +468,7 @@
return kInvalidFile;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
return kInvalidFile;
@@ -354,9 +496,15 @@
off64_t local_header_offset = cdr->local_file_header_offset;
if (local_header_offset == UINT32_MAX) {
- // TODO(xunchang) parse the zip64 eocd
- ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
- return kInvalidFile;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
}
if (local_header_offset >= archive->directory_offset) {
@@ -404,7 +552,7 @@
ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
- return 0;
+ return kSuccess;
}
static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@@ -521,7 +669,7 @@
return kInvalidOffset;
}
- const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
// The offset of the start of the central directory in the zipfile.
// We keep this lying around so that we can sanity check all our lengths
@@ -545,8 +693,27 @@
// the extra field.
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
cdr->local_file_header_offset == UINT32_MAX) {
- ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
- return kInvalidFile;
+ const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
+ cdr->local_file_header_offset, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
+ data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
+ data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
+ }
+
+ if (local_header_offset == UINT32_MAX) {
+ CHECK(zip64_info.local_header_offset.has_value());
+ local_header_offset = zip64_info.local_header_offset.value();
+ }
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -561,14 +728,68 @@
return kIoError;
}
- const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
-
+ auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
if (lfh->lfh_signature != LocalFileHeader::kSignature) {
ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
static_cast<int64_t>(local_header_offset));
return kInvalidOffset;
}
+ // Check that the local file header name matches the declared name in the central directory.
+ CHECK_LE(entryName.size(), UINT16_MAX);
+ auto nameLen = static_cast<uint16_t>(entryName.size());
+ if (lfh->file_name_length != nameLen) {
+ ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
+ std::string(entryName).c_str(), lfh->file_name_length, nameLen);
+ return kInconsistentInformation;
+ }
+ const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
+ if (name_offset > cd_offset - lfh->file_name_length) {
+ ALOGW("Zip: lfh name has invalid declared length");
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> name_buf(nameLen);
+ if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
+ ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
+ return kIoError;
+ }
+ if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
+ ALOGW("Zip: lfh name did not match central directory");
+ return kInconsistentInformation;
+ }
+
+ uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
+ uint64_t lfh_compressed_size = lfh->compressed_size;
+ if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
+ const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
+ const uint16_t lfh_extra_field_size = lfh->extra_field_length;
+ if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
+ ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
+ return kInvalidOffset;
+ }
+
+ std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
+ if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
+ lfh_extra_field_offset)) {
+ ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
+ return kIoError;
+ }
+
+ Zip64ExtendedInfo zip64_info{};
+ if (auto status = ParseZip64ExtendedInfoInExtraField(
+ local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
+ lfh->compressed_size, std::nullopt, &zip64_info);
+ status != kSuccess) {
+ return status;
+ }
+
+ CHECK(zip64_info.uncompressed_file_size.has_value());
+ CHECK(zip64_info.compressed_file_size.has_value());
+ lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
+ lfh_compressed_size = zip64_info.compressed_file_size.value();
+ }
+
// Paranoia: Match the values specified in the local file header
// to those specified in the central directory.
@@ -594,12 +815,12 @@
// header agree on the crc, compressed, and uncompressed sizes of the entry.
if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
data->has_data_descriptor = 0;
- if (data->compressed_length != lfh->compressed_size ||
- data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
+ if (data->compressed_length != lfh_compressed_size ||
+ data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
- "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
- data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
- lfh->uncompressed_size, lfh->crc32);
+ "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
+ data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
+ lfh_uncompressed_size, lfh->crc32);
return kInconsistentInformation;
}
} else {
@@ -622,30 +843,6 @@
// Currently only needed to implement zipinfo.
data->is_text = (cdr->internal_file_attributes & 1);
- // Check that the local file header name matches the declared
- // name in the central directory.
- CHECK_LE(entryName.size(), UINT16_MAX);
- auto nameLen = static_cast<uint16_t>(entryName.size());
- if (lfh->file_name_length != nameLen) {
- ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
- std::string(entryName).c_str(), lfh->file_name_length, nameLen);
- return kInconsistentInformation;
- }
- const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
- if (name_offset + lfh->file_name_length > cd_offset) {
- ALOGW("Zip: lfh name has invalid declared length");
- return kInvalidOffset;
- }
- std::vector<uint8_t> name_buf(nameLen);
- if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
- ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
- return kIoError;
- }
- if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
- ALOGW("Zip: lfh name did not match central directory");
- return kInconsistentInformation;
- }
-
const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
lfh->file_name_length + lfh->extra_field_length;
if (data_offset > cd_offset) {