Support parsing of data descriptor
The size fields in the data descriptor can be either 4 bytes or 8 bytes.
This depends on if the size are read from the zip64 extended field in
the local file header. This cl adds support to parse these cases.
Also fix a misconception in that the uncompressed and compressed size
doesn't need to exist together in the zip64 fields of the central
directory. But they still need to co-exist in the fields of the local
file header.
Bug: 150900468
Test: unit tests pass, python tests pass
Change-Id: Ia54f9bf56c85ff456ead90a136f7fddc5be5220c
diff --git a/libziparchive/include/ziparchive/zip_archive.h b/libziparchive/include/ziparchive/zip_archive.h
index 435bfb6..4697bb7 100644
--- a/libziparchive/include/ziparchive/zip_archive.h
+++ b/libziparchive/include/ziparchive/zip_archive.h
@@ -77,6 +77,10 @@
// footer.
uint32_t uncompressed_length;
+ // If the value of uncompressed length and compressed length are stored in
+ // the zip64 extended info of the extra field.
+ bool zip64_format_size{false};
+
// The offset to the start of data for this ZipEntry.
off64_t offset;
diff --git a/libziparchive/test_ziparchive_large.py b/libziparchive/test_ziparchive_large.py
index c29c37e..6b82f63 100644
--- a/libziparchive/test_ziparchive_large.py
+++ b/libziparchive/test_ziparchive_large.py
@@ -26,13 +26,17 @@
class Zip64Test(unittest.TestCase):
@staticmethod
+ def _WriteFile(path, size_in_kib):
+ contents = os.path.basename(path)[0] * 1024
+ with open(path, 'w') as f:
+ for it in range(0, size_in_kib):
+ f.write(contents)
+
+ @staticmethod
def _AddEntriesToZip(output_zip, entries_dict=None):
for name, size in entries_dict.items():
- contents = name[0] * 1024
file_path = tempfile.NamedTemporaryFile()
- with open(file_path.name, 'w') as f:
- for it in range(0, size):
- f.write(contents)
+ Zip64Test._WriteFile(file_path.name, size)
output_zip.write(file_path.name, arcname = name)
def _getEntryNames(self, zip_name):
@@ -93,6 +97,22 @@
self._ExtractEntries(zip_path.name)
+ def test_forceDataDescriptor(self):
+ file_path = tempfile.NamedTemporaryFile(suffix='.txt')
+ # TODO create the entry > 4GiB.
+ self._WriteFile(file_path.name, 1024)
+
+ zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ pass
+ # The fd option force writes a data descriptor
+ cmd = ['zip', '-fd', zip_path.name, file_path.name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc.communicate()
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEquals([file_path.name[1:]], read_names)
+ self._ExtractEntries(zip_path.name)
+
if __name__ == '__main__':
testsuite = unittest.TestLoader().discover(
os.path.dirname(os.path.realpath(__file__)))
diff --git a/libziparchive/testdata/zip64.zip b/libziparchive/testdata/zip64.zip
new file mode 100644
index 0000000..3f25a4c
--- /dev/null
+++ b/libziparchive/testdata/zip64.zip
Binary files differ
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 849b68c..031d43a 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -57,8 +57,6 @@
#include "zip_archive_common.h"
#include "zip_archive_private.h"
-using android::base::get_unaligned;
-
// Used to turn on crc checks - verify that the content CRC matches the values
// specified in the local file header and the central directory.
static const bool kCrcChecksEnabled = false;
@@ -221,7 +219,7 @@
for (; i >= 0; i--) {
if (scan_buffer[i] == 0x50) {
uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
- if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
+ if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
ALOGV("+++ Found EOCD at buf+%d", i);
break;
}
@@ -360,8 +358,9 @@
// Data Size - 2 bytes
uint16_t offset = 0;
while (offset < extraFieldLength - 4) {
- auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
- auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
+ auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
+ auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
+ auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);
offset += 4;
if (dataSize > extraFieldLength - offset) {
@@ -376,54 +375,44 @@
continue;
}
- uint16_t expectedDataSize = 0;
- // We expect the extended field to include both uncompressed and compressed size.
- if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
- expectedDataSize += 16;
+ std::optional<uint64_t> uncompressedFileSize;
+ std::optional<uint64_t> compressedFileSize;
+ std::optional<uint64_t> localHeaderOffset;
+ if (zip32UncompressedSize == UINT32_MAX) {
+ uncompressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
+ }
+ if (zip32CompressedSize == UINT32_MAX) {
+ compressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
- expectedDataSize += 8;
+ localHeaderOffset = ConsumeUnaligned<uint64_t>(&readPtr);
}
- if (expectedDataSize == 0) {
+ // calculate how many bytes we read after the data size field.
+ size_t bytesRead = readPtr - (extraFieldStart + offset);
+ if (bytesRead == 0) {
ALOGW("Zip: Data size should not be 0 in zip64 extended field");
return kInvalidFile;
}
- if (dataSize != expectedDataSize) {
+ if (dataSize != bytesRead) {
auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
? std::to_string(zip32LocalFileHeaderOffset.value())
: "missing";
- ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
+ ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
- expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
+ bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
localOffsetString.c_str());
return kInvalidFile;
}
- std::optional<uint64_t> uncompressedFileSize;
- std::optional<uint64_t> compressedFileSize;
- std::optional<uint64_t> localHeaderOffset;
- if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
- uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
- compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
- offset += 16;
-
- // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
- // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
- // bytes counter to replace stream.total_out.
- if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
- ALOGW(
- "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
- ", compressed size %" PRIu64,
- uncompressedFileSize.value(), compressedFileSize.value());
- return kInvalidFile;
- }
- }
-
- if (zip32LocalFileHeaderOffset == UINT32_MAX) {
- localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
- offset += 8;
+ // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
+ // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
+ // bytes counter to replace stream.total_out.
+ if ((uncompressedFileSize.has_value() && uncompressedFileSize.value() > UINT32_MAX) ||
+ (compressedFileSize.has_value() && compressedFileSize.value() > UINT32_MAX)) {
+ ALOGW("Zip: File size larger than UINT32_MAX isn't supported yet");
+ return kInvalidFile;
}
zip64Info->uncompressed_file_size = uncompressedFileSize;
@@ -625,7 +614,8 @@
}
static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
- uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
+ // Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
+ uint8_t ddBuf[24];
off64_t offset = entry->offset;
if (entry->method != kCompressStored) {
offset += entry->compressed_length;
@@ -638,18 +628,26 @@
}
const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
- const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
- const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
+ uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddBuf + 4 : ddBuf;
+ DataDescriptor descriptor{};
+ descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ if (entry->zip64_format_size) {
+ descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
+ descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
+ } else {
+ descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
+ }
// Validate that the values in the data descriptor match those in the central
// directory.
- if (entry->compressed_length != descriptor->compressed_size ||
- entry->uncompressed_length != descriptor->uncompressed_size ||
- entry->crc32 != descriptor->crc32) {
+ if (entry->compressed_length != descriptor.compressed_size ||
+ entry->uncompressed_length != descriptor.uncompressed_size ||
+ entry->crc32 != descriptor.crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
- "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
+ "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
entry->compressed_length, entry->uncompressed_length, entry->crc32,
- descriptor->compressed_size, descriptor->uncompressed_size, descriptor->crc32);
+ descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
return kInconsistentInformation;
}
@@ -706,18 +704,14 @@
return status;
}
- if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
- CHECK(zip64_info.uncompressed_file_size.has_value());
- CHECK(zip64_info.compressed_file_size.has_value());
- // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
- data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
- data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
- }
-
- if (local_header_offset == UINT32_MAX) {
- CHECK(zip64_info.local_header_offset.has_value());
- local_header_offset = zip64_info.local_header_offset.value();
- }
+ // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
+ data->uncompressed_length =
+ static_cast<uint32_t>(zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size));
+ data->compressed_length =
+ static_cast<uint32_t>(zip64_info.compressed_file_size.value_or(cdr->compressed_size));
+ local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
+ data->zip64_format_size =
+ cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -766,6 +760,13 @@
uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
uint64_t lfh_compressed_size = lfh->compressed_size;
if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
+ if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
+ ALOGW(
+ "Zip: The zip64 extended field in the local header MUST include BOTH original and "
+ "compressed file size fields.");
+ return kInvalidFile;
+ }
+
const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
const uint16_t lfh_extra_field_size = lfh->extra_field_length;
if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
diff --git a/libziparchive/zip_archive_common.h b/libziparchive/zip_archive_common.h
index a92d4d2..d461856 100644
--- a/libziparchive/zip_archive_common.h
+++ b/libziparchive/zip_archive_common.h
@@ -165,15 +165,24 @@
// CRC-32 checksum of the entry.
uint32_t crc32;
- // Compressed size of the entry.
- uint32_t compressed_size;
- // Uncompressed size of the entry.
- uint32_t uncompressed_size;
+
+ // For ZIP64 format archives, the compressed and uncompressed sizes are 8
+ // bytes each. Also, the ZIP64 format MAY be used regardless of the size
+ // of a file. When extracting, if the zip64 extended information extra field
+ // is present for the file the compressed and uncompressed sizes will be 8
+ // byte values.
+
+ // Compressed size of the entry, the field can be either 4 bytes or 8 bytes
+ // in the zip file.
+ uint64_t compressed_size;
+ // Uncompressed size of the entry, the field can be either 4 bytes or 8 bytes
+ // in the zip file.
+ uint64_t uncompressed_size;
private:
DataDescriptor() = default;
DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
-} __attribute__((packed));
+};
// The zip64 end of central directory locator helps to find the zip64 EOCD.
struct Zip64EocdLocator {
diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h
index 5f5232f..4ed07aa 100644
--- a/libziparchive/zip_archive_private.h
+++ b/libziparchive/zip_archive_private.h
@@ -28,6 +28,7 @@
#include "android-base/macros.h"
#include "android-base/mapped_file.h"
+#include "android-base/memory.h"
#include "zip_cd_entry_map.h"
#include "zip_error.h"
@@ -104,3 +105,20 @@
bool InitializeCentralDirectory(off64_t cd_start_offset, size_t cd_size);
};
+
+int32_t ExtractToWriter(ZipArchiveHandle handle, ZipEntry* entry, zip_archive::Writer* writer);
+
+// Reads the unaligned data of type |T| and auto increment the offset.
+template <typename T>
+static T ConsumeUnaligned(uint8_t** address) {
+ auto ret = android::base::get_unaligned<T>(*address);
+ *address += sizeof(T);
+ return ret;
+}
+
+// Writes the unaligned data of type |T| and auto increment the offset.
+template <typename T>
+void EmitUnaligned(uint8_t** address, T data) {
+ android::base::put_unaligned<T>(*address, data);
+ *address += sizeof(T);
+}
diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc
index 69be3df..3563340 100644
--- a/libziparchive/zip_archive_test.cc
+++ b/libziparchive/zip_archive_test.cc
@@ -979,10 +979,11 @@
std::vector<uint8_t> extended_field;
// Fake data to mimic the compressed bytes in the zipfile.
std::vector<uint8_t> compressed_bytes;
+ std::vector<uint8_t> data_descriptor;
size_t GetSize() const {
return local_file_header.size() + file_name.size() + extended_field.size() +
- compressed_bytes.size();
+ compressed_bytes.size() + data_descriptor.size();
}
void CopyToOutput(std::vector<uint8_t>* output) const {
@@ -990,6 +991,7 @@
std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output));
+ std::copy(data_descriptor.begin(), data_descriptor.end(), std::back_inserter(*output));
}
};
@@ -1057,7 +1059,7 @@
// Add an entry to the zipfile, construct the corresponding local header and cd entry.
void AddEntry(const std::string& name, const std::vector<uint8_t>& content,
bool uncompressed_size_in_extended, bool compressed_size_in_extended,
- bool local_offset_in_extended) {
+ bool local_offset_in_extended, bool include_data_descriptor = false) {
auto uncompressed_size = static_cast<uint32_t>(content.size());
auto compressed_size = static_cast<uint32_t>(content.size());
uint32_t local_file_header_offset = 0;
@@ -1084,6 +1086,21 @@
ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size,
compressed_size);
ConstructExtendedField(zip64_fields, &local_entry.extended_field);
+ if (include_data_descriptor) {
+ size_t descriptor_size = compressed_size_in_extended ? 24 : 16;
+ local_entry.data_descriptor.resize(descriptor_size);
+ uint8_t* write_ptr = local_entry.data_descriptor.data();
+ EmitUnaligned<uint32_t>(&write_ptr, DataDescriptor::kOptSignature);
+ EmitUnaligned<uint32_t>(&write_ptr, 0 /* crc */);
+ if (compressed_size_in_extended) {
+ EmitUnaligned<uint64_t>(&write_ptr, compressed_size_in_extended);
+ EmitUnaligned<uint64_t>(&write_ptr, uncompressed_size_in_extended);
+ } else {
+ EmitUnaligned<uint32_t>(&write_ptr, compressed_size_in_extended);
+ EmitUnaligned<uint32_t>(&write_ptr, uncompressed_size_in_extended);
+ }
+ }
+
file_entries_.push_back(std::move(local_entry));
if (local_offset_in_extended) {
@@ -1270,3 +1287,38 @@
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
CloseArchive(handle);
}
+
+TEST_F(Zip64ParseTest, extract) {
+ std::vector<uint8_t> content(200, 'a');
+ AddEntry("a.txt", content, true, true, true);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry));
+
+ VectorWriter writer;
+ ASSERT_EQ(0, ExtractToWriter(handle, &entry, &writer));
+ ASSERT_EQ(content, writer.GetOutput());
+}
+
+TEST_F(Zip64ParseTest, extractWithDataDescriptor) {
+ std::vector<uint8_t> content(300, 'b');
+ AddEntry("a.txt", std::vector<uint8_t>(200, 'a'), true, true, true);
+ AddEntry("b.txt", content, true, true, true, true /* data descriptor */);
+ ConstructEocd();
+ ConstructZipFile();
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(
+ 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
+ ZipEntry entry;
+ ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry));
+
+ VectorWriter writer;
+ ASSERT_EQ(0, ExtractToWriter(handle, &entry, &writer));
+ ASSERT_EQ(content, writer.GetOutput());
+}
diff --git a/libziparchive/zip_writer.cc b/libziparchive/zip_writer.cc
index 67279a6..25b1da4 100644
--- a/libziparchive/zip_writer.cc
+++ b/libziparchive/zip_writer.cc
@@ -475,19 +475,16 @@
if (ShouldUseDataDescriptor()) {
// Some versions of ZIP don't allow STORED data to have a trailing DataDescriptor.
// If this file is not seekable, or if the data is compressed, write a DataDescriptor.
- const uint32_t sig = DataDescriptor::kOptSignature;
- if (fwrite(&sig, sizeof(sig), 1, file_) != 1) {
+ // We haven't supported zip64 format yet. Write both uncompressed size and compressed
+ // size as uint32_t.
+ std::vector<uint32_t> dataDescriptor = {
+ DataDescriptor::kOptSignature, current_file_entry_.crc32,
+ current_file_entry_.compressed_size, current_file_entry_.uncompressed_size};
+ if (fwrite(dataDescriptor.data(), dataDescriptor.size() * sizeof(uint32_t), 1, file_) != 1) {
return HandleError(kIoError);
}
- DataDescriptor dd = {};
- dd.crc32 = current_file_entry_.crc32;
- dd.compressed_size = current_file_entry_.compressed_size;
- dd.uncompressed_size = current_file_entry_.uncompressed_size;
- if (fwrite(&dd, sizeof(dd), 1, file_) != 1) {
- return HandleError(kIoError);
- }
- current_offset_ += sizeof(DataDescriptor::kOptSignature) + sizeof(dd);
+ current_offset_ += sizeof(uint32_t) * dataDescriptor.size();
} else {
// Seek back to the header and rewrite to include the size.
if (fseeko(file_, current_file_entry_.local_file_header_offset, SEEK_SET) != 0) {