Support parsing of data descriptor
The size fields in the data descriptor can be either 4 bytes or 8 bytes.
This depends on if the size are read from the zip64 extended field in
the local file header. This cl adds support to parse these cases.
Also fix a misconception in that the uncompressed and compressed size
doesn't need to exist together in the zip64 fields of the central
directory. But they still need to co-exist in the fields of the local
file header.
Bug: 150900468
Test: unit tests pass, python tests pass
Change-Id: Ia54f9bf56c85ff456ead90a136f7fddc5be5220c
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 849b68c..031d43a 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -57,8 +57,6 @@
#include "zip_archive_common.h"
#include "zip_archive_private.h"
-using android::base::get_unaligned;
-
// Used to turn on crc checks - verify that the content CRC matches the values
// specified in the local file header and the central directory.
static const bool kCrcChecksEnabled = false;
@@ -221,7 +219,7 @@
for (; i >= 0; i--) {
if (scan_buffer[i] == 0x50) {
uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
- if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
+ if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
ALOGV("+++ Found EOCD at buf+%d", i);
break;
}
@@ -360,8 +358,9 @@
// Data Size - 2 bytes
uint16_t offset = 0;
while (offset < extraFieldLength - 4) {
- auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
- auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
+ auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
+ auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
+ auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);
offset += 4;
if (dataSize > extraFieldLength - offset) {
@@ -376,54 +375,44 @@
continue;
}
- uint16_t expectedDataSize = 0;
- // We expect the extended field to include both uncompressed and compressed size.
- if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
- expectedDataSize += 16;
+ std::optional<uint64_t> uncompressedFileSize;
+ std::optional<uint64_t> compressedFileSize;
+ std::optional<uint64_t> localHeaderOffset;
+ if (zip32UncompressedSize == UINT32_MAX) {
+ uncompressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
+ }
+ if (zip32CompressedSize == UINT32_MAX) {
+ compressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
- expectedDataSize += 8;
+ localHeaderOffset = ConsumeUnaligned<uint64_t>(&readPtr);
}
- if (expectedDataSize == 0) {
+ // calculate how many bytes we read after the data size field.
+ size_t bytesRead = readPtr - (extraFieldStart + offset);
+ if (bytesRead == 0) {
ALOGW("Zip: Data size should not be 0 in zip64 extended field");
return kInvalidFile;
}
- if (dataSize != expectedDataSize) {
+ if (dataSize != bytesRead) {
auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
? std::to_string(zip32LocalFileHeaderOffset.value())
: "missing";
- ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
+ ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
- expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
+ bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
localOffsetString.c_str());
return kInvalidFile;
}
- std::optional<uint64_t> uncompressedFileSize;
- std::optional<uint64_t> compressedFileSize;
- std::optional<uint64_t> localHeaderOffset;
- if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
- uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
- compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
- offset += 16;
-
- // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
- // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
- // bytes counter to replace stream.total_out.
- if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
- ALOGW(
- "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
- ", compressed size %" PRIu64,
- uncompressedFileSize.value(), compressedFileSize.value());
- return kInvalidFile;
- }
- }
-
- if (zip32LocalFileHeaderOffset == UINT32_MAX) {
- localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
- offset += 8;
+ // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
+ // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
+ // bytes counter to replace stream.total_out.
+ if ((uncompressedFileSize.has_value() && uncompressedFileSize.value() > UINT32_MAX) ||
+ (compressedFileSize.has_value() && compressedFileSize.value() > UINT32_MAX)) {
+ ALOGW("Zip: File size larger than UINT32_MAX isn't supported yet");
+ return kInvalidFile;
}
zip64Info->uncompressed_file_size = uncompressedFileSize;
@@ -625,7 +614,8 @@
}
static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
- uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
+ // Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
+ uint8_t ddBuf[24];
off64_t offset = entry->offset;
if (entry->method != kCompressStored) {
offset += entry->compressed_length;
@@ -638,18 +628,26 @@
}
const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
- const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
- const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
+ uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddBuf + 4 : ddBuf;
+ DataDescriptor descriptor{};
+ descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ if (entry->zip64_format_size) {
+ descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
+ descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
+ } else {
+ descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ }
// Validate that the values in the data descriptor match those in the central
// directory.
- if (entry->compressed_length != descriptor->compressed_size ||
- entry->uncompressed_length != descriptor->uncompressed_size ||
- entry->crc32 != descriptor->crc32) {
+ if (entry->compressed_length != descriptor.compressed_size ||
+ entry->uncompressed_length != descriptor.uncompressed_size ||
+ entry->crc32 != descriptor.crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
- "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
+ "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
entry->compressed_length, entry->uncompressed_length, entry->crc32,
- descriptor->compressed_size, descriptor->uncompressed_size, descriptor->crc32);
+ descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
return kInconsistentInformation;
}
@@ -706,18 +704,14 @@
return status;
}
- if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
- CHECK(zip64_info.uncompressed_file_size.has_value());
- CHECK(zip64_info.compressed_file_size.has_value());
- // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
- data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
- data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
- }
-
- if (local_header_offset == UINT32_MAX) {
- CHECK(zip64_info.local_header_offset.has_value());
- local_header_offset = zip64_info.local_header_offset.value();
- }
+ // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
+ data->uncompressed_length =
+ static_cast<uint32_t>(zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size));
+ data->compressed_length =
+ static_cast<uint32_t>(zip64_info.compressed_file_size.value_or(cdr->compressed_size));
+ local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
+ data->zip64_format_size =
+ cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -766,6 +760,13 @@
uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
uint64_t lfh_compressed_size = lfh->compressed_size;
if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
+ if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
+ ALOGW(
+ "Zip: The zip64 extended field in the local header MUST include BOTH original and "
+ "compressed file size fields.");
+ return kInvalidFile;
+ }
+
const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
const uint16_t lfh_extra_field_size = lfh->extra_field_length;
if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {