blob: f1780f45f78edd96ee173d9ec43012280eda6a81 [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
21#include <assert.h>
22#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070023#include <fcntl.h>
24#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000025#include <limits.h>
26#include <log/log.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000027#include <stdlib.h>
28#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000029#include <unistd.h>
Mark Salyzyn51d562d2014-05-05 14:38:05 -070030#include <utils/Compat.h>
Narayan Kamatheaf98852013-12-11 14:51:51 +000031#include <utils/FileMap.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070032#include <zlib.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000033
34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
35
Mark Salyzyn99ef9912014-03-14 14:26:22 -070036#include "ziparchive/zip_archive.h"
37
Narayan Kamath7462f022013-11-21 13:05:04 +000038// This is for windows. If we don't open a file in binary mode, weirds
39// things will happen.
40#ifndef O_BINARY
41#define O_BINARY 0
42#endif
43
44/*
45 * Zip file constants.
46 */
47static const uint32_t kEOCDSignature = 0x06054b50;
48static const uint32_t kEOCDLen = 2;
49static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file
50static const uint32_t kEOCDSize = 12; // size of the central directory
51static const uint32_t kEOCDFileOffset = 16; // offset to central directory
52
53static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort
54static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen);
55
56static const uint32_t kLFHSignature = 0x04034b50;
57static const uint32_t kLFHLen = 30; // excluding variable-len fields
58static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags
59static const uint32_t kLFHCRC = 14; // offset to CRC
60static const uint32_t kLFHCompLen = 18; // offset to compressed length
61static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length
62static const uint32_t kLFHNameLen = 26; // offset to filename length
63static const uint32_t kLFHExtraLen = 28; // offset to extra length
64
65static const uint32_t kCDESignature = 0x02014b50;
66static const uint32_t kCDELen = 46; // excluding variable-len fields
67static const uint32_t kCDEMethod = 10; // offset to compression method
68static const uint32_t kCDEModWhen = 12; // offset to modification timestamp
69static const uint32_t kCDECRC = 16; // offset to entry CRC
70static const uint32_t kCDECompLen = 20; // offset to compressed length
71static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length
72static const uint32_t kCDENameLen = 28; // offset to filename length
73static const uint32_t kCDEExtraLen = 30; // offset to extra length
74static const uint32_t kCDECommentLen = 32; // offset to comment length
75static const uint32_t kCDELocalOffset = 42; // offset to local hdr
76
77static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature
78static const uint32_t kDDSignatureLen = 4;
79static const uint32_t kDDLen = 12;
80static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without
81static const uint32_t kDDCrc32 = 0; // offset to crc32
82static const uint32_t kDDCompLen = 4; // offset to compressed length
83static const uint32_t kDDUncompLen = 8; // offset to uncompressed length
84
85static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
86
87static const uint32_t kMaxErrorLen = 1024;
88
89static const char* kErrorMessages[] = {
90 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000091 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +000092 "Zlib error",
93 "Invalid file",
94 "Invalid handle",
95 "Duplicate entries in archive",
96 "Empty archive",
97 "Entry not found",
98 "Invalid offset",
99 "Inconsistent information",
100 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000101 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000102 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000103};
104
105static const int32_t kErrorMessageUpperBound = 0;
106
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000107static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000108
109// We encountered a Zlib error when inflating a stream from this file.
110// Usually indicates file corruption.
111static const int32_t kZlibError = -2;
112
113// The input file cannot be processed as a zip archive. Usually because
114// it's too small, too large or does not have a valid signature.
115static const int32_t kInvalidFile = -3;
116
117// An invalid iteration / ziparchive handle was passed in as an input
118// argument.
119static const int32_t kInvalidHandle = -4;
120
121// The zip archive contained two (or possibly more) entries with the same
122// name.
123static const int32_t kDuplicateEntry = -5;
124
125// The zip archive contains no entries.
126static const int32_t kEmptyArchive = -6;
127
128// The specified entry was not found in the archive.
129static const int32_t kEntryNotFound = -7;
130
131// The zip archive contained an invalid local file header pointer.
132static const int32_t kInvalidOffset = -8;
133
134// The zip archive contained inconsistent entry information. This could
135// be because the central directory & local file header did not agree, or
136// if the actual uncompressed length or crc32 do not match their declared
137// values.
138static const int32_t kInconsistentInformation = -9;
139
140// An invalid entry name was encountered.
141static const int32_t kInvalidEntryName = -10;
142
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000143// An I/O related system call (read, lseek, ftruncate, map) failed.
144static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000145
Narayan Kamatheaf98852013-12-11 14:51:51 +0000146// We were not able to mmap the central directory or entry contents.
147static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000148
Narayan Kamatheaf98852013-12-11 14:51:51 +0000149static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000150
Narayan Kamatheaf98852013-12-11 14:51:51 +0000151static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000152
153/*
154 * A Read-only Zip archive.
155 *
156 * We want "open" and "find entry by name" to be fast operations, and
157 * we want to use as little memory as possible. We memory-map the zip
158 * central directory, and load a hash table with pointers to the filenames
159 * (which aren't null-terminated). The other fields are at a fixed offset
160 * from the filename, so we don't need to extract those (but we do need
161 * to byte-read and endian-swap them every time we want them).
162 *
163 * It's possible that somebody has handed us a massive (~1GB) zip archive,
164 * so we can't expect to mmap the entire file.
165 *
166 * To speed comparisons when doing a lookup by name, we could make the mapping
167 * "private" (copy-on-write) and null-terminate the filenames after verifying
168 * the record structure. However, this requires a private mapping of
169 * every page that the Central Directory touches. Easier to tuck a copy
170 * of the string length into the hash table entry.
171 */
172struct ZipArchive {
173 /* open Zip archive */
174 int fd;
175
176 /* mapped central directory area */
177 off64_t directory_offset;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000178 android::FileMap* directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000179
180 /* number of entries in the Zip archive */
181 uint16_t num_entries;
182
183 /*
184 * We know how many entries are in the Zip archive, so we can have a
185 * fixed-size hash table. We define a load factor of 0.75 and overallocat
186 * so the maximum number entries can never be higher than
187 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
188 */
189 uint32_t hash_table_size;
190 ZipEntryName* hash_table;
191};
192
193// Returns 0 on success and negative values on failure.
Narayan Kamatheaf98852013-12-11 14:51:51 +0000194static android::FileMap* MapFileSegment(const int fd, const off64_t start,
195 const size_t length, const bool read_only,
196 const char* debug_file_name) {
197 android::FileMap* file_map = new android::FileMap;
198 const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
199 if (!success) {
200 file_map->release();
201 return NULL;
Narayan Kamath7462f022013-11-21 13:05:04 +0000202 }
203
Narayan Kamatheaf98852013-12-11 14:51:51 +0000204 return file_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000205}
206
207static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
208 static const uint32_t kBufSize = 32768;
209 uint8_t buf[kBufSize];
210
211 uint32_t count = 0;
212 uint64_t crc = 0;
Narayan Kamath58aaf462013-12-10 16:47:14 +0000213 while (count < length) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000214 uint32_t remaining = length - count;
215
216 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
217 // value.
218 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
219 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
220
221 if (actual != get_size) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700222 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
Narayan Kamath7462f022013-11-21 13:05:04 +0000223 return kIoError;
224 }
225
226 memcpy(begin + count, buf, get_size);
227 crc = crc32(crc, buf, get_size);
228 count += get_size;
229 }
230
231 *crc_out = crc;
232
233 return 0;
234}
235
236/*
237 * Round up to the next highest power of 2.
238 *
239 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
240 */
241static uint32_t RoundUpPower2(uint32_t val) {
242 val--;
243 val |= val >> 1;
244 val |= val >> 2;
245 val |= val >> 4;
246 val |= val >> 8;
247 val |= val >> 16;
248 val++;
249
250 return val;
251}
252
253static uint32_t ComputeHash(const char* str, uint16_t len) {
254 uint32_t hash = 0;
255
256 while (len--) {
257 hash = hash * 31 + *str++;
258 }
259
260 return hash;
261}
262
263/*
264 * Convert a ZipEntry to a hash table index, verifying that it's in a
265 * valid range.
266 */
267static int64_t EntryToIndex(const ZipEntryName* hash_table,
268 const uint32_t hash_table_size,
269 const char* name, uint16_t length) {
270 const uint32_t hash = ComputeHash(name, length);
271
272 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
273 uint32_t ent = hash & (hash_table_size - 1);
274 while (hash_table[ent].name != NULL) {
275 if (hash_table[ent].name_length == length &&
276 memcmp(hash_table[ent].name, name, length) == 0) {
277 return ent;
278 }
279
280 ent = (ent + 1) & (hash_table_size - 1);
281 }
282
Colin Crossf4b0b792014-02-06 20:07:15 -0800283 ALOGV("Zip: Unable to find entry %.*s", length, name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000284 return kEntryNotFound;
285}
286
287/*
288 * Add a new entry to the hash table.
289 */
290static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
291 const char* name, uint16_t length) {
292 const uint64_t hash = ComputeHash(name, length);
293 uint32_t ent = hash & (hash_table_size - 1);
294
295 /*
296 * We over-allocated the table, so we're guaranteed to find an empty slot.
297 * Further, we guarantee that the hashtable size is not 0.
298 */
299 while (hash_table[ent].name != NULL) {
300 if (hash_table[ent].name_length == length &&
301 memcmp(hash_table[ent].name, name, length) == 0) {
302 // We've found a duplicate entry. We don't accept it
303 ALOGW("Zip: Found duplicate entry %.*s", length, name);
304 return kDuplicateEntry;
305 }
306 ent = (ent + 1) & (hash_table_size - 1);
307 }
308
309 hash_table[ent].name = name;
310 hash_table[ent].name_length = length;
311 return 0;
312}
313
314/*
315 * Get 2 little-endian bytes.
316 */
317static uint16_t get2LE(const uint8_t* src) {
318 return src[0] | (src[1] << 8);
319}
320
321/*
322 * Get 4 little-endian bytes.
323 */
324static uint32_t get4LE(const uint8_t* src) {
325 uint32_t result;
326
327 result = src[0];
328 result |= src[1] << 8;
329 result |= src[2] << 16;
330 result |= src[3] << 24;
331
332 return result;
333}
334
335static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
336 ZipArchive* archive, off64_t file_length,
337 uint32_t read_amount, uint8_t* scan_buffer) {
338 const off64_t search_start = file_length - read_amount;
339
340 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700341 ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000342 return kIoError;
343 }
344 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
345 if (actual != (ssize_t) read_amount) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700346 ALOGW("Zip: read %u failed: %s", read_amount, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000347 return kIoError;
348 }
349
350 /*
351 * Scan backward for the EOCD magic. In an archive without a trailing
352 * comment, we'll find it on the first try. (We may want to consider
353 * doing an initial minimal read; if we don't find it, retry with a
354 * second read as above.)
355 */
356 int i;
357 for (i = read_amount - kEOCDLen; i >= 0; i--) {
358 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
359 ALOGV("+++ Found EOCD at buf+%d", i);
360 break;
361 }
362 }
363 if (i < 0) {
364 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
365 return kInvalidFile;
366 }
367
368 const off64_t eocd_offset = search_start + i;
369 const uint8_t* eocd_ptr = scan_buffer + i;
370
371 assert(eocd_offset < file_length);
372
373 /*
374 * Grab the CD offset and size, and the number of entries in the
375 * archive. Verify that they look reasonable. Widen dir_size and
376 * dir_offset to the file offset type.
377 */
378 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
379 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
380 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
381
382 if (dir_offset + dir_size > eocd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700383 ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000384 dir_offset, dir_size, eocd_offset);
385 return kInvalidOffset;
386 }
387 if (num_entries == 0) {
388 ALOGW("Zip: empty archive?");
389 return kEmptyArchive;
390 }
391
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700392 ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64,
393 num_entries, dir_size, dir_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000394
395 /*
396 * It all looks good. Create a mapping for the CD, and set the fields
397 * in archive.
398 */
Narayan Kamatheaf98852013-12-11 14:51:51 +0000399 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
400 true /* read only */, debug_file_name);
401 if (map == NULL) {
402 archive->directory_map = NULL;
403 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000404 }
405
Narayan Kamatheaf98852013-12-11 14:51:51 +0000406 archive->directory_map = map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000407 archive->num_entries = num_entries;
408 archive->directory_offset = dir_offset;
409
410 return 0;
411}
412
413/*
414 * Find the zip Central Directory and memory-map it.
415 *
416 * On success, returns 0 after populating fields from the EOCD area:
417 * directory_offset
418 * directory_map
419 * num_entries
420 */
421static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
422 ZipArchive* archive) {
423
424 // Test file length. We use lseek64 to make sure the file
425 // is small enough to be a zip file (Its size must be less than
426 // 0xffffffff bytes).
427 off64_t file_length = lseek64(fd, 0, SEEK_END);
428 if (file_length == -1) {
429 ALOGV("Zip: lseek on fd %d failed", fd);
430 return kInvalidFile;
431 }
432
433 if (file_length > (off64_t) 0xffffffff) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700434 ALOGV("Zip: zip file too long %" PRId64, file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000435 return kInvalidFile;
436 }
437
438 if (file_length < (int64_t) kEOCDLen) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700439 ALOGV("Zip: length %" PRId64 " is too small to be zip", file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000440 return kInvalidFile;
441 }
442
443 /*
444 * Perform the traditional EOCD snipe hunt.
445 *
446 * We're searching for the End of Central Directory magic number,
447 * which appears at the start of the EOCD block. It's followed by
448 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
449 * need to read the last part of the file into a buffer, dig through
450 * it to find the magic number, parse some values out, and use those
451 * to determine the extent of the CD.
452 *
453 * We start by pulling in the last part of the file.
454 */
455 uint32_t read_amount = kMaxEOCDSearch;
456 if (file_length < (off64_t) read_amount) {
457 read_amount = file_length;
458 }
459
460 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
461 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
462 file_length, read_amount, scan_buffer);
463
464 free(scan_buffer);
465 return result;
466}
467
468/*
469 * Parses the Zip archive's Central Directory. Allocates and populates the
470 * hash table.
471 *
472 * Returns 0 on success.
473 */
474static int32_t ParseZipArchive(ZipArchive* archive) {
475 int32_t result = -1;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000476 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
477 size_t cd_length = archive->directory_map->getDataLength();
Narayan Kamath7462f022013-11-21 13:05:04 +0000478 uint16_t num_entries = archive->num_entries;
479
480 /*
481 * Create hash table. We have a minimum 75% load factor, possibly as
482 * low as 50% after we round off to a power of 2. There must be at
483 * least one unused entry to avoid an infinite loop during creation.
484 */
485 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
486 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
487 sizeof(ZipEntryName));
488
489 /*
490 * Walk through the central directory, adding entries to the hash
491 * table and verifying values.
492 */
493 const uint8_t* ptr = cd_ptr;
494 for (uint16_t i = 0; i < num_entries; i++) {
495 if (get4LE(ptr) != kCDESignature) {
496 ALOGW("Zip: missed a central dir sig (at %d)", i);
497 goto bail;
498 }
499
500 if (ptr + kCDELen > cd_ptr + cd_length) {
501 ALOGW("Zip: ran off the end (at %d)", i);
502 goto bail;
503 }
504
505 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
506 if (local_header_offset >= archive->directory_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700507 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %d", local_header_offset, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000508 goto bail;
509 }
510
511 const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
512 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
513 const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
514
515 /* add the CDE filename to the hash table */
516 const int add_result = AddToHash(archive->hash_table,
517 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
518 if (add_result) {
519 ALOGW("Zip: Error adding entry to hash table %d", add_result);
520 result = add_result;
521 goto bail;
522 }
523
524 ptr += kCDELen + file_name_length + extra_length + comment_length;
525 if ((size_t)(ptr - cd_ptr) > cd_length) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700526 ALOGW("Zip: bad CD advance (%zu vs %zu) at entry %d",
527 (size_t) (ptr - cd_ptr), cd_length, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000528 goto bail;
529 }
530 }
531 ALOGV("+++ zip good scan %d entries", num_entries);
532
533 result = 0;
534
535bail:
536 return result;
537}
538
539static int32_t OpenArchiveInternal(ZipArchive* archive,
540 const char* debug_file_name) {
541 int32_t result = -1;
542 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
543 return result;
544 }
545
546 if ((result = ParseZipArchive(archive))) {
547 return result;
548 }
549
550 return 0;
551}
552
553int32_t OpenArchiveFd(int fd, const char* debug_file_name,
554 ZipArchiveHandle* handle) {
555 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
556 memset(archive, 0, sizeof(*archive));
557 *handle = archive;
558
559 archive->fd = fd;
560
561 return OpenArchiveInternal(archive, debug_file_name);
562}
563
564int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
565 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
566 memset(archive, 0, sizeof(*archive));
567 *handle = archive;
568
569 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
570 if (fd < 0) {
571 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
572 return kIoError;
573 } else {
574 archive->fd = fd;
575 }
576
577 return OpenArchiveInternal(archive, fileName);
578}
579
580/*
581 * Close a ZipArchive, closing the file and freeing the contents.
582 */
583void CloseArchive(ZipArchiveHandle handle) {
584 ZipArchive* archive = (ZipArchive*) handle;
585 ALOGV("Closing archive %p", archive);
586
587 if (archive->fd >= 0) {
588 close(archive->fd);
589 }
590
Narayan Kamatheaf98852013-12-11 14:51:51 +0000591 if (archive->directory_map != NULL) {
592 archive->directory_map->release();
593 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000594 free(archive->hash_table);
Mathieu Chartier5f98b122014-03-04 17:39:38 -0800595 free(archive);
Narayan Kamath7462f022013-11-21 13:05:04 +0000596}
597
598static int32_t UpdateEntryFromDataDescriptor(int fd,
599 ZipEntry *entry) {
600 uint8_t ddBuf[kDDMaxLen];
601 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
602 if (actual != sizeof(ddBuf)) {
603 return kIoError;
604 }
605
606 const uint32_t ddSignature = get4LE(ddBuf);
607 uint16_t ddOffset = 0;
608 if (ddSignature == kDDOptSignature) {
609 ddOffset = 4;
610 }
611
612 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
613 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
614 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
615
616 return 0;
617}
618
619// Attempts to read |len| bytes into |buf| at offset |off|.
620//
621// This method uses pread64 on platforms that support it and
622// lseek64 + read on platforms that don't. This implies that
623// callers should not rely on the |fd| offset being incremented
624// as a side effect of this call.
625static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
626 off64_t off) {
627#ifdef HAVE_PREAD
628 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
629#else
630 // The only supported platform that doesn't support pread at the moment
631 // is Windows. Only recent versions of windows support unix like forks,
632 // and even there the semantics are quite different.
633 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700634 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000635 return kIoError;
636 }
637
638 return TEMP_FAILURE_RETRY(read(fd, buf, len));
639#endif // HAVE_PREAD
640}
641
642static int32_t FindEntry(const ZipArchive* archive, const int ent,
643 ZipEntry* data) {
644 const uint16_t nameLen = archive->hash_table[ent].name_length;
645 const char* name = archive->hash_table[ent].name;
646
647 // Recover the start of the central directory entry from the filename
648 // pointer. The filename is the first entry past the fixed-size data,
649 // so we can just subtract back from that.
650 const unsigned char* ptr = (const unsigned char*) name;
651 ptr -= kCDELen;
652
653 // This is the base of our mmapped region, we have to sanity check that
654 // the name that's in the hash table is a pointer to a location within
655 // this mapped region.
656 const unsigned char* base_ptr = (const unsigned char*)
Narayan Kamatheaf98852013-12-11 14:51:51 +0000657 archive->directory_map->getDataPtr();
658 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000659 ALOGW("Zip: Invalid entry pointer");
660 return kInvalidOffset;
661 }
662
663 // The offset of the start of the central directory in the zipfile.
664 // We keep this lying around so that we can sanity check all our lengths
665 // and our per-file structures.
666 const off64_t cd_offset = archive->directory_offset;
667
668 // Fill out the compression method, modification time, crc32
669 // and other interesting attributes from the central directory. These
670 // will later be compared against values from the local file header.
671 data->method = get2LE(ptr + kCDEMethod);
672 data->mod_time = get4LE(ptr + kCDEModWhen);
673 data->crc32 = get4LE(ptr + kCDECRC);
674 data->compressed_length = get4LE(ptr + kCDECompLen);
675 data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
676
677 // Figure out the local header offset from the central directory. The
678 // actual file data will begin after the local header and the name /
679 // extra comments.
680 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
681 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
682 ALOGW("Zip: bad local hdr offset in zip");
683 return kInvalidOffset;
684 }
685
686 uint8_t lfh_buf[kLFHLen];
687 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
688 local_header_offset);
689 if (actual != sizeof(lfh_buf)) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700690 ALOGW("Zip: failed reading lfh name from offset %" PRId64, local_header_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000691 return kIoError;
692 }
693
694 if (get4LE(lfh_buf) != kLFHSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700695 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Narayan Kamath7462f022013-11-21 13:05:04 +0000696 local_header_offset);
697 return kInvalidOffset;
698 }
699
700 // Paranoia: Match the values specified in the local file header
701 // to those specified in the central directory.
702 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
703 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
704 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
705
706 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
707 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
708 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
709 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
710
711 data->has_data_descriptor = 0;
712 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
713 || data->crc32 != lfhCrc) {
714 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
715 data->compressed_length, data->uncompressed_length, data->crc32,
716 lfhCompLen, lfhUncompLen, lfhCrc);
717 return kInconsistentInformation;
718 }
719 } else {
720 data->has_data_descriptor = 1;
721 }
722
723 // Check that the local file header name matches the declared
724 // name in the central directory.
725 if (lfhNameLen == nameLen) {
726 const off64_t name_offset = local_header_offset + kLFHLen;
727 if (name_offset + lfhNameLen >= cd_offset) {
728 ALOGW("Zip: Invalid declared length");
729 return kInvalidOffset;
730 }
731
732 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
733 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
734 name_offset);
735
736 if (actual != nameLen) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700737 ALOGW("Zip: failed reading lfh name from offset %" PRId64, name_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000738 free(name_buf);
739 return kIoError;
740 }
741
742 if (memcmp(name, name_buf, nameLen)) {
743 free(name_buf);
744 return kInconsistentInformation;
745 }
746
747 free(name_buf);
748 } else {
749 ALOGW("Zip: lfh name did not match central directory.");
750 return kInconsistentInformation;
751 }
752
753 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
Narayan Kamath48953a12014-01-24 12:32:39 +0000754 if (data_offset > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700755 ALOGW("Zip: bad data offset %" PRId64 " in zip", data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000756 return kInvalidOffset;
757 }
758
759 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700760 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %zd > %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000761 data_offset, data->compressed_length, cd_offset);
762 return kInvalidOffset;
763 }
764
765 if (data->method == kCompressStored &&
766 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700767 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %d > %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000768 data_offset, data->uncompressed_length, cd_offset);
769 return kInvalidOffset;
770 }
771
772 data->offset = data_offset;
773 return 0;
774}
775
776struct IterationHandle {
777 uint32_t position;
778 const char* prefix;
779 uint16_t prefix_len;
780 ZipArchive* archive;
781};
782
783int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
784 ZipArchive* archive = (ZipArchive *) handle;
785
786 if (archive == NULL || archive->hash_table == NULL) {
787 ALOGW("Zip: Invalid ZipArchiveHandle");
788 return kInvalidHandle;
789 }
790
791 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
792 cookie->position = 0;
793 cookie->prefix = prefix;
794 cookie->archive = archive;
795 if (prefix != NULL) {
796 cookie->prefix_len = strlen(prefix);
797 }
798
799 *cookie_ptr = cookie ;
800 return 0;
801}
802
803int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
804 ZipEntry* data) {
805 const ZipArchive* archive = (ZipArchive*) handle;
806 const int nameLen = strlen(entryName);
807 if (nameLen == 0 || nameLen > 65535) {
808 ALOGW("Zip: Invalid filename %s", entryName);
809 return kInvalidEntryName;
810 }
811
812 const int64_t ent = EntryToIndex(archive->hash_table,
813 archive->hash_table_size, entryName, nameLen);
814
815 if (ent < 0) {
Narayan Kamatha1ff8012013-12-31 10:27:59 +0000816 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000817 return ent;
818 }
819
820 return FindEntry(archive, ent, data);
821}
822
823int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
824 IterationHandle* handle = (IterationHandle *) cookie;
825 if (handle == NULL) {
826 return kInvalidHandle;
827 }
828
829 ZipArchive* archive = handle->archive;
830 if (archive == NULL || archive->hash_table == NULL) {
831 ALOGW("Zip: Invalid ZipArchiveHandle");
832 return kInvalidHandle;
833 }
834
835 const uint32_t currentOffset = handle->position;
836 const uint32_t hash_table_length = archive->hash_table_size;
837 const ZipEntryName *hash_table = archive->hash_table;
838
839 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
840 if (hash_table[i].name != NULL &&
841 (handle->prefix == NULL ||
842 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
843 handle->position = (i + 1);
844 const int error = FindEntry(archive, i, data);
845 if (!error) {
846 name->name = hash_table[i].name;
847 name->name_length = hash_table[i].name_length;
848 }
849
850 return error;
851 }
852 }
853
854 handle->position = 0;
855 return kIterationEnd;
856}
857
858static int32_t InflateToFile(int fd, const ZipEntry* entry,
859 uint8_t* begin, uint32_t length,
860 uint64_t* crc_out) {
861 int32_t result = -1;
862 const uint32_t kBufSize = 32768;
863 uint8_t read_buf[kBufSize];
864 uint8_t write_buf[kBufSize];
865 z_stream zstream;
866 int zerr;
867
868 /*
869 * Initialize the zlib stream struct.
870 */
871 memset(&zstream, 0, sizeof(zstream));
872 zstream.zalloc = Z_NULL;
873 zstream.zfree = Z_NULL;
874 zstream.opaque = Z_NULL;
875 zstream.next_in = NULL;
876 zstream.avail_in = 0;
877 zstream.next_out = (Bytef*) write_buf;
878 zstream.avail_out = kBufSize;
879 zstream.data_type = Z_UNKNOWN;
880
881 /*
882 * Use the undocumented "negative window bits" feature to tell zlib
883 * that there's no zlib header waiting for it.
884 */
885 zerr = inflateInit2(&zstream, -MAX_WBITS);
886 if (zerr != Z_OK) {
887 if (zerr == Z_VERSION_ERROR) {
888 ALOGE("Installed zlib is not compatible with linked version (%s)",
889 ZLIB_VERSION);
890 } else {
891 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
892 }
893
894 return kZlibError;
895 }
896
897 const uint32_t uncompressed_length = entry->uncompressed_length;
898
899 uint32_t compressed_length = entry->compressed_length;
900 uint32_t write_count = 0;
901 do {
902 /* read as much as we can */
903 if (zstream.avail_in == 0) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700904 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
905 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
Narayan Kamath7462f022013-11-21 13:05:04 +0000906 if (actual != getSize) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700907 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
Narayan Kamath7462f022013-11-21 13:05:04 +0000908 result = kIoError;
909 goto z_bail;
910 }
911
912 compressed_length -= getSize;
913
914 zstream.next_in = read_buf;
915 zstream.avail_in = getSize;
916 }
917
918 /* uncompress the data */
919 zerr = inflate(&zstream, Z_NO_FLUSH);
920 if (zerr != Z_OK && zerr != Z_STREAM_END) {
921 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
922 zerr, zstream.next_in, zstream.avail_in,
923 zstream.next_out, zstream.avail_out);
924 result = kZlibError;
925 goto z_bail;
926 }
927
928 /* write when we're full or when we're done */
929 if (zstream.avail_out == 0 ||
930 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
931 const size_t write_size = zstream.next_out - write_buf;
932 // The file might have declared a bogus length.
933 if (write_size + write_count > length) {
934 goto z_bail;
935 }
936 memcpy(begin + write_count, write_buf, write_size);
937 write_count += write_size;
938
939 zstream.next_out = write_buf;
940 zstream.avail_out = kBufSize;
941 }
942 } while (zerr == Z_OK);
943
944 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
945
946 // stream.adler holds the crc32 value for such streams.
947 *crc_out = zstream.adler;
948
949 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700950 ALOGW("Zip: size mismatch on inflated file (%ld vs %u)",
Narayan Kamath7462f022013-11-21 13:05:04 +0000951 zstream.total_out, uncompressed_length);
952 result = kInconsistentInformation;
953 goto z_bail;
954 }
955
956 result = 0;
957
958z_bail:
959 inflateEnd(&zstream); /* free up any allocated structures */
960
961 return result;
962}
963
964int32_t ExtractToMemory(ZipArchiveHandle handle,
965 ZipEntry* entry, uint8_t* begin, uint32_t size) {
966 ZipArchive* archive = (ZipArchive*) handle;
967 const uint16_t method = entry->method;
968 off64_t data_offset = entry->offset;
969
970 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700971 ALOGW("Zip: lseek to data at %" PRId64 " failed", data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000972 return kIoError;
973 }
974
975 // this should default to kUnknownCompressionMethod.
976 int32_t return_value = -1;
977 uint64_t crc = 0;
978 if (method == kCompressStored) {
979 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
980 } else if (method == kCompressDeflated) {
981 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
982 }
983
984 if (!return_value && entry->has_data_descriptor) {
985 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
986 if (return_value) {
987 return return_value;
988 }
989 }
990
991 // TODO: Fix this check by passing the right flags to inflate2 so that
992 // it calculates the CRC for us.
993 if (entry->crc32 != crc && false) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700994 ALOGW("Zip: crc mismatch: expected %u, was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +0000995 return kInconsistentInformation;
996 }
997
998 return return_value;
999}
1000
1001int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1002 ZipEntry* entry, int fd) {
1003 const int32_t declared_length = entry->uncompressed_length;
1004
Narayan Kamath00a258c2013-12-13 16:06:19 +00001005 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1006 if (current_offset == -1) {
1007 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1008 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +00001009 return kIoError;
1010 }
1011
Narayan Kamath00a258c2013-12-13 16:06:19 +00001012 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1013 if (result == -1) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -07001014 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1015 declared_length + current_offset, strerror(errno));
Narayan Kamath00a258c2013-12-13 16:06:19 +00001016 return kIoError;
1017 }
1018
Narayan Kamath48953a12014-01-24 12:32:39 +00001019 // Don't attempt to map a region of length 0. We still need the
1020 // ftruncate() though, since the API guarantees that we will truncate
1021 // the file to the end of the uncompressed output.
1022 if (declared_length == 0) {
1023 return 0;
1024 }
1025
Narayan Kamath00a258c2013-12-13 16:06:19 +00001026 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length,
Narayan Kamatheaf98852013-12-11 14:51:51 +00001027 false, kTempMappingFileName);
1028 if (map == NULL) {
1029 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +00001030 }
1031
Narayan Kamatheaf98852013-12-11 14:51:51 +00001032 const int32_t error = ExtractToMemory(handle, entry,
1033 reinterpret_cast<uint8_t*>(map->getDataPtr()),
1034 map->getDataLength());
1035 map->release();
Narayan Kamath7462f022013-11-21 13:05:04 +00001036 return error;
1037}
1038
1039const char* ErrorCodeString(int32_t error_code) {
1040 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1041 return kErrorMessages[error_code * -1];
1042 }
1043
1044 return kErrorMessages[0];
1045}
1046
1047int GetFileDescriptor(const ZipArchiveHandle handle) {
1048 return ((ZipArchive*) handle)->fd;
1049}
1050