Merge "Add definition for zip64 struct"
diff --git a/libziparchive/include/ziparchive/zip_archive.h b/libziparchive/include/ziparchive/zip_archive.h
index 098a9cb..a17127d 100644
--- a/libziparchive/include/ziparchive/zip_archive.h
+++ b/libziparchive/include/ziparchive/zip_archive.h
@@ -145,7 +145,7 @@
   /** The size in bytes of the archive itself. Used by zipinfo. */
   off64_t archive_size;
   /** The number of entries in the archive. */
-  size_t entry_count;
+  uint64_t entry_count;
 };
 
 /**
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index aa8bafc..afbc5d8 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -65,6 +65,10 @@
 // The maximum number of bytes to scan backwards for the EOCD start.
 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
 
+// Set a reasonable cap (256 GiB) for the zip file size. So the data is always valid when
+// we parse the fields in cd or local headers as 64 bits signed integers.
+static constexpr uint64_t kMaxFileLength = 256 * static_cast<uint64_t>(1u << 30u);
+
 /*
  * A Read-only Zip archive.
  *
@@ -125,12 +129,27 @@
   }
 }
 
-static int32_t MapCentralDirectory0(const char* debug_file_name, ZipArchive* archive,
-                                    off64_t file_length, uint32_t read_amount,
-                                    uint8_t* scan_buffer) {
+struct CentralDirectoryInfo {
+  uint64_t num_records;
+  // The size of the central directory (in bytes).
+  uint64_t cd_size;
+  // The offset of the start of the central directory, relative
+  // to the start of the file.
+  uint64_t cd_start_offset;
+};
+
+static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
+  ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
+  return kInvalidFile;
+}
+
+static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
+                                         off64_t file_length, uint32_t read_amount,
+                                         CentralDirectoryInfo* cdInfo) {
+  std::vector<uint8_t> scan_buffer(read_amount);
   const off64_t search_start = file_length - read_amount;
 
-  if (!archive->mapped_zip.ReadAtOffset(scan_buffer, read_amount, search_start)) {
+  if (!archive->mapped_zip.ReadAtOffset(scan_buffer.data(), read_amount, search_start)) {
     ALOGE("Zip: read %" PRId64 " from offset %" PRId64 " failed", static_cast<int64_t>(read_amount),
           static_cast<int64_t>(search_start));
     return kIoError;
@@ -159,7 +178,7 @@
   }
 
   const off64_t eocd_offset = search_start + i;
-  const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
+  auto eocd = reinterpret_cast<const EocdRecord*>(scan_buffer.data() + i);
   /*
    * Verify that there's no trailing space at the end of the central directory
    * and its comment.
@@ -171,6 +190,13 @@
     return kInvalidFile;
   }
 
+  // One of the field is 0xFFFFFFFF, look for the zip64 EOCD instead.
+  if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
+    ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
+          eocd->cd_size, eocd->cd_start_offset);
+    return FindCentralDirectoryInfoForZip64(cdInfo);
+  }
+
   /*
    * Grab the CD offset and size, and the number of entries in the
    * archive and verify that they look reasonable.
@@ -180,47 +206,29 @@
           eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
     return kInvalidOffset;
   }
-  if (eocd->num_records == 0) {
-#if defined(__ANDROID__)
-    ALOGW("Zip: empty archive?");
-#endif
-    return kEmptyArchive;
-  }
 
-  ALOGV("+++ num_entries=%" PRIu32 " dir_size=%" PRIu32 " dir_offset=%" PRIu32, eocd->num_records,
-        eocd->cd_size, eocd->cd_start_offset);
-
-  // It all looks good.  Create a mapping for the CD, and set the fields
-  // in archive.
-  if (!archive->InitializeCentralDirectory(static_cast<off64_t>(eocd->cd_start_offset),
-                                           static_cast<size_t>(eocd->cd_size))) {
-    return kMmapFailed;
-  }
-
-  archive->num_entries = eocd->num_records;
-  archive->directory_offset = eocd->cd_start_offset;
-
-  return 0;
+  *cdInfo = {.num_records = eocd->num_records,
+             .cd_size = eocd->cd_size,
+             .cd_start_offset = eocd->cd_start_offset};
+  return kSuccess;
 }
 
 /*
  * Find the zip Central Directory and memory-map it.
  *
- * On success, returns 0 after populating fields from the EOCD area:
+ * On success, returns kSuccess after populating fields from the EOCD area:
  *   directory_offset
  *   directory_ptr
  *   num_entries
  */
-static int32_t MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) {
-  // Test file length. We use lseek64 to make sure the file
-  // is small enough to be a zip file (Its size must be less than
-  // 0xffffffff bytes).
+static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) {
+  // Test file length. We use lseek64 to make sure the file is small enough to be a zip file.
   off64_t file_length = archive->mapped_zip.GetFileLength();
   if (file_length == -1) {
     return kInvalidFile;
   }
 
-  if (file_length > static_cast<off64_t>(0xffffffff)) {
+  if (file_length > kMaxFileLength) {
     ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
     return kInvalidFile;
   }
@@ -247,10 +255,39 @@
     read_amount = static_cast<uint32_t>(file_length);
   }
 
-  std::vector<uint8_t> scan_buffer(read_amount);
-  int32_t result =
-      MapCentralDirectory0(debug_file_name, archive, file_length, read_amount, scan_buffer.data());
-  return result;
+  CentralDirectoryInfo cdInfo = {};
+  if (auto result =
+          FindCentralDirectoryInfo(debug_file_name, archive, file_length, read_amount, &cdInfo);
+      result != kSuccess) {
+    return result;
+  }
+
+  if (cdInfo.num_records == 0) {
+#if defined(__ANDROID__)
+    ALOGW("Zip: empty archive?");
+#endif
+    return kEmptyArchive;
+  }
+
+  if (cdInfo.cd_size >= SIZE_MAX) {
+    ALOGW("Zip: The size of central directory doesn't fit in range of size_t: %" PRIu64,
+          cdInfo.cd_size);
+    return kInvalidFile;
+  }
+
+  ALOGV("+++ num_entries=%" PRIu64 " dir_size=%" PRIu64 " dir_offset=%" PRIu64, cdInfo.num_records,
+        cdInfo.cd_size, cdInfo.cd_start_offset);
+
+  // It all looks good.  Create a mapping for the CD, and set the fields in archive.
+  if (!archive->InitializeCentralDirectory(static_cast<off64_t>(cdInfo.cd_start_offset),
+                                           static_cast<size_t>(cdInfo.cd_size))) {
+    return kMmapFailed;
+  }
+
+  archive->num_entries = cdInfo.num_records;
+  archive->directory_offset = cdInfo.cd_start_offset;
+
+  return kSuccess;
 }
 
 /*
@@ -262,13 +299,12 @@
 static int32_t ParseZipArchive(ZipArchive* archive) {
   const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
   const size_t cd_length = archive->central_directory.GetMapLength();
-  const uint16_t num_entries = archive->num_entries;
+  const uint64_t num_entries = archive->num_entries;
 
-  // TODO(xunchang) parse the zip64 Eocd
-  if (num_entries > UINT16_MAX) {
-    archive->cd_entry_map = CdEntryMapZip64::Create();
+  if (num_entries <= UINT16_MAX) {
+    archive->cd_entry_map = CdEntryMapZip32::Create(static_cast<uint16_t>(num_entries));
   } else {
-    archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
+    archive->cd_entry_map = CdEntryMapZip64::Create();
   }
   if (archive->cd_entry_map == nullptr) {
     return kAllocationFailed;
@@ -280,9 +316,9 @@
    */
   const uint8_t* const cd_end = cd_ptr + cd_length;
   const uint8_t* ptr = cd_ptr;
-  for (uint16_t i = 0; i < num_entries; i++) {
+  for (uint64_t i = 0; i < num_entries; i++) {
     if (ptr > cd_end - sizeof(CentralDirectoryRecord)) {
-      ALOGW("Zip: ran off the end (item #%" PRIu16 ", %zu bytes of central directory)", i,
+      ALOGW("Zip: ran off the end (item #%" PRIu64 ", %zu bytes of central directory)", i,
             cd_length);
 #if defined(__ANDROID__)
       android_errorWriteLog(0x534e4554, "36392138");
@@ -292,14 +328,7 @@
 
     const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
     if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
-      ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
-      return kInvalidFile;
-    }
-
-    const off64_t local_header_offset = cdr->local_file_header_offset;
-    if (local_header_offset >= archive->directory_offset) {
-      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16,
-            static_cast<int64_t>(local_header_offset), i);
+      ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
       return kInvalidFile;
     }
 
@@ -308,15 +337,37 @@
     const uint16_t comment_length = cdr->comment_length;
     const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
 
-    if (file_name + file_name_length > cd_end) {
-      ALOGW("Zip: file name for entry %" PRIu16
+    if (file_name_length >= cd_length || file_name > cd_end - file_name_length) {
+      ALOGW("Zip: file name for entry %" PRIu64
             " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
             i, file_name_length, cd_length);
       return kInvalidEntryName;
     }
+
+    const uint8_t* extra_field = file_name + file_name_length;
+    if (extra_length >= cd_length || extra_field > cd_end - extra_length) {
+      ALOGW("Zip: extra field for entry %" PRIu64
+            " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
+            i, extra_length, cd_length);
+      return kInvalidFile;
+    }
+
+    off64_t local_header_offset = cdr->local_file_header_offset;
+    if (local_header_offset == UINT32_MAX) {
+      // TODO(xunchang) parse the zip64 eocd
+      ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
+      return kInvalidFile;
+    }
+
+    if (local_header_offset >= archive->directory_offset) {
+      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu64,
+            static_cast<int64_t>(local_header_offset), i);
+      return kInvalidFile;
+    }
+
     // Check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters.
     if (!IsValidEntryName(file_name, file_name_length)) {
-      ALOGW("Zip: invalid file name at entry %" PRIu16, i);
+      ALOGW("Zip: invalid file name at entry %" PRIu64, i);
       return kInvalidEntryName;
     }
 
@@ -331,7 +382,7 @@
 
     ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
     if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
-      ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16, ptr - cd_ptr, cd_length, i);
+      ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu64, ptr - cd_ptr, cd_length, i);
       return kInvalidFile;
     }
   }
@@ -351,14 +402,14 @@
     return kInvalidFile;
   }
 
-  ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
+  ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
 
   return 0;
 }
 
 static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
   int32_t result = MapCentralDirectory(debug_file_name, archive);
-  return result != 0 ? result : ParseZipArchive(archive);
+  return result != kSuccess ? result : ParseZipArchive(archive);
 }
 
 int32_t OpenArchiveFd(int fd, const char* debug_file_name, ZipArchiveHandle* handle,
@@ -489,7 +540,15 @@
   // Figure out the local header offset from the central directory. The
   // actual file data will begin after the local header and the name /
   // extra comments.
-  const off64_t local_header_offset = cdr->local_file_header_offset;
+  off64_t local_header_offset = cdr->local_file_header_offset;
+  // One of the info field is UINT32_MAX, try to parse the real value in the zip64 extended info in
+  // the extra field.
+  if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
+      cdr->local_file_header_offset == UINT32_MAX) {
+    ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
+    return kInvalidFile;
+  }
+
   if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
     ALOGW("Zip: bad local hdr offset in zip");
     return kInvalidOffset;
diff --git a/libziparchive/zip_archive_common.h b/libziparchive/zip_archive_common.h
index 8b99bde..a92d4d2 100644
--- a/libziparchive/zip_archive_common.h
+++ b/libziparchive/zip_archive_common.h
@@ -21,6 +21,8 @@
 
 #include <inttypes.h>
 
+#include <optional>
+
 // The "end of central directory" (EOCD) record. Each archive
 // contains exactly once such record which appears at the end of
 // the archive. It contains archive wide information like the
@@ -173,6 +175,89 @@
   DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
 } __attribute__((packed));
 
+// The zip64 end of central directory locator helps to find the zip64 EOCD.
+struct Zip64EocdLocator {
+  static constexpr uint32_t kSignature = 0x07064b50;
+
+  // The signature of zip64 eocd locator, must be |kSignature|
+  uint32_t locator_signature;
+  // The start disk of the zip64 eocd. This implementation assumes that each
+  // archive spans a single disk only.
+  uint32_t eocd_start_disk;
+  // The offset offset of the zip64 end of central directory record.
+  uint64_t zip64_eocd_offset;
+  // The total number of disks. This implementation assumes that each archive
+  // spans a single disk only.
+  uint32_t num_of_disks;
+
+ private:
+  Zip64EocdLocator() = default;
+  DISALLOW_COPY_AND_ASSIGN(Zip64EocdLocator);
+} __attribute__((packed));
+
+// The optional zip64 EOCD. If one of the fields in the end of central directory
+// record is too small to hold required data, the field SHOULD be  set to -1
+// (0xFFFF or 0xFFFFFFFF) and the ZIP64 format record SHOULD be created.
+struct Zip64EocdRecord {
+  static constexpr uint32_t kSignature = 0x06064b50;
+
+  // The signature of zip64 eocd record, must be |kSignature|
+  uint32_t record_signature;
+  // Size of zip64 end of central directory record. It SHOULD be the size of the
+  // remaining record and SHOULD NOT include the leading 12 bytes.
+  uint64_t record_size;
+  // The version of the tool that make this archive.
+  uint16_t version_made_by;
+  // Tool version needed to extract this archive.
+  uint16_t version_needed;
+  // Number of this disk.
+  uint32_t disk_num;
+  // Number of the disk with the start of the central directory.
+  uint32_t cd_start_disk;
+  // Total number of entries in the central directory on this disk.
+  // This implementation assumes that each archive spans a single
+  // disk only. i.e, that num_records_on_disk == num_records.
+  uint64_t num_records_on_disk;
+  // The total number of central directory records.
+  uint64_t num_records;
+  // The size of the central directory in bytes.
+  uint64_t cd_size;
+  // The offset of the start of the central directory, relative to the start of
+  // the file.
+  uint64_t cd_start_offset;
+
+ private:
+  Zip64EocdRecord() = default;
+  DISALLOW_COPY_AND_ASSIGN(Zip64EocdRecord);
+} __attribute__((packed));
+
+// The possible contents of the Zip64 Extended Information Extra Field. It may appear in
+// the 'extra' field of a central directory record or local file header. The order of
+// the fields in the zip64 extended information record is fixed, but the fields MUST
+// only appear if the corresponding local or central directory record field is set to
+// 0xFFFF or 0xFFFFFFFF. And this entry in the Local header MUST include BOTH original
+// and compressed file size fields.
+struct Zip64ExtendedInfo {
+  static constexpr uint16_t kHeaderId = 0x0001;
+  // The header tag for this 'extra' block, should be |kHeaderId|.
+  uint16_t header_id;
+  // The size in bytes of the remaining data (excluding the top 4 bytes).
+  uint16_t data_size;
+  // Size in bytes of the uncompressed file.
+  std::optional<uint64_t> uncompressed_file_size;
+  // Size in bytes of the compressed file.
+  std::optional<uint64_t> compressed_file_size;
+  // Local file header offset relative to the start of the zip file.
+  std::optional<uint64_t> local_header_offset;
+
+  // This implementation assumes that each archive spans a single disk only. So
+  // the disk_number is not used.
+  // uint32_t disk_num;
+ private:
+  Zip64ExtendedInfo() = default;
+  DISALLOW_COPY_AND_ASSIGN(Zip64ExtendedInfo);
+};
+
 // mask value that signifies that the entry has a DD
 static const uint32_t kGPBDDFlagMask = 0x0008;
 
diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h
index 3509b89..5f5232f 100644
--- a/libziparchive/zip_archive_private.h
+++ b/libziparchive/zip_archive_private.h
@@ -95,7 +95,7 @@
   std::unique_ptr<android::base::MappedFile> directory_map;
 
   // number of entries in the Zip archive
-  uint16_t num_entries;
+  uint64_t num_entries;
   std::unique_ptr<CdEntryMapInterface> cd_entry_map;
 
   ZipArchive(MappedZipFile&& map, bool assume_ownership);
diff --git a/libziparchive/ziptool.cpp b/libziparchive/ziptool.cpp
index dd42e90..f345ffc 100644
--- a/libziparchive/ziptool.cpp
+++ b/libziparchive/ziptool.cpp
@@ -133,8 +133,8 @@
     if (!flag_1 && includes.empty() && excludes.empty()) {
       ZipArchiveInfo info{GetArchiveInfo(zah)};
       printf("Archive:  %s\n", archive_name);
-      printf("Zip file size: %" PRId64 " bytes, number of entries: %zu\n", info.archive_size,
-             info.entry_count);
+      printf("Zip file size: %" PRId64 " bytes, number of entries: %" PRIu64 "\n",
+             info.archive_size, info.entry_count);
     }
   }
 }