| Colin Cross | 163d5a9 | 2012-03-22 18:46:44 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2007 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | // | 
|  | 18 | // Read-only access to Zip archives, with minimal heap allocation. | 
|  | 19 | // | 
|  | 20 | #define LOG_TAG "zipro" | 
|  | 21 | //#define LOG_NDEBUG 0 | 
|  | 22 | #include <utils/Log.h> | 
|  | 23 | #include <utils/ZipFileRO.h> | 
|  | 24 | #include <utils/misc.h> | 
|  | 25 | #include <utils/threads.h> | 
|  | 26 |  | 
|  | 27 | #include <zlib.h> | 
|  | 28 |  | 
|  | 29 | #include <string.h> | 
|  | 30 | #include <fcntl.h> | 
|  | 31 | #include <errno.h> | 
|  | 32 | #include <assert.h> | 
|  | 33 | #include <unistd.h> | 
|  | 34 |  | 
|  | 35 | #if HAVE_PRINTF_ZD | 
|  | 36 | #  define ZD "%zd" | 
|  | 37 | #  define ZD_TYPE ssize_t | 
|  | 38 | #else | 
|  | 39 | #  define ZD "%ld" | 
|  | 40 | #  define ZD_TYPE long | 
|  | 41 | #endif | 
|  | 42 |  | 
|  | 43 | /* | 
|  | 44 | * We must open binary files using open(path, ... | O_BINARY) under Windows. | 
|  | 45 | * Otherwise strange read errors will happen. | 
|  | 46 | */ | 
|  | 47 | #ifndef O_BINARY | 
|  | 48 | #  define O_BINARY  0 | 
|  | 49 | #endif | 
|  | 50 |  | 
|  | 51 | /* | 
|  | 52 | * TEMP_FAILURE_RETRY is defined by some, but not all, versions of | 
|  | 53 | * <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's | 
|  | 54 | * not already defined, then define it here. | 
|  | 55 | */ | 
|  | 56 | #ifndef TEMP_FAILURE_RETRY | 
|  | 57 | /* Used to retry syscalls that can return EINTR. */ | 
|  | 58 | #define TEMP_FAILURE_RETRY(exp) ({         \ | 
|  | 59 | typeof (exp) _rc;                      \ | 
|  | 60 | do {                                   \ | 
|  | 61 | _rc = (exp);                       \ | 
|  | 62 | } while (_rc == -1 && errno == EINTR); \ | 
|  | 63 | _rc; }) | 
|  | 64 | #endif | 
|  | 65 |  | 
|  | 66 | using namespace android; | 
|  | 67 |  | 
|  | 68 | /* | 
|  | 69 | * Zip file constants. | 
|  | 70 | */ | 
|  | 71 | #define kEOCDSignature      0x06054b50 | 
|  | 72 | #define kEOCDLen            22 | 
|  | 73 | #define kEOCDNumEntries     8               // offset to #of entries in file | 
|  | 74 | #define kEOCDSize           12              // size of the central directory | 
|  | 75 | #define kEOCDFileOffset     16              // offset to central directory | 
|  | 76 |  | 
|  | 77 | #define kMaxCommentLen      65535           // longest possible in ushort | 
|  | 78 | #define kMaxEOCDSearch      (kMaxCommentLen + kEOCDLen) | 
|  | 79 |  | 
|  | 80 | #define kLFHSignature       0x04034b50 | 
|  | 81 | #define kLFHLen             30              // excluding variable-len fields | 
|  | 82 | #define kLFHNameLen         26              // offset to filename length | 
|  | 83 | #define kLFHExtraLen        28              // offset to extra length | 
|  | 84 |  | 
|  | 85 | #define kCDESignature       0x02014b50 | 
|  | 86 | #define kCDELen             46              // excluding variable-len fields | 
|  | 87 | #define kCDEMethod          10              // offset to compression method | 
|  | 88 | #define kCDEModWhen         12              // offset to modification timestamp | 
|  | 89 | #define kCDECRC             16              // offset to entry CRC | 
|  | 90 | #define kCDECompLen         20              // offset to compressed length | 
|  | 91 | #define kCDEUncompLen       24              // offset to uncompressed length | 
|  | 92 | #define kCDENameLen         28              // offset to filename length | 
|  | 93 | #define kCDEExtraLen        30              // offset to extra length | 
|  | 94 | #define kCDECommentLen      32              // offset to comment length | 
|  | 95 | #define kCDELocalOffset     42              // offset to local hdr | 
|  | 96 |  | 
|  | 97 | /* | 
|  | 98 | * The values we return for ZipEntryRO use 0 as an invalid value, so we | 
|  | 99 | * want to adjust the hash table index by a fixed amount.  Using a large | 
|  | 100 | * value helps insure that people don't mix & match arguments, e.g. to | 
|  | 101 | * findEntryByIndex(). | 
|  | 102 | */ | 
|  | 103 | #define kZipEntryAdj        10000 | 
|  | 104 |  | 
|  | 105 | ZipFileRO::~ZipFileRO() { | 
|  | 106 | free(mHashTable); | 
|  | 107 | if (mDirectoryMap) | 
|  | 108 | mDirectoryMap->release(); | 
|  | 109 | if (mFd >= 0) | 
|  | 110 | TEMP_FAILURE_RETRY(close(mFd)); | 
|  | 111 | if (mFileName) | 
|  | 112 | free(mFileName); | 
|  | 113 | } | 
|  | 114 |  | 
|  | 115 | /* | 
|  | 116 | * Convert a ZipEntryRO to a hash table index, verifying that it's in a | 
|  | 117 | * valid range. | 
|  | 118 | */ | 
|  | 119 | int ZipFileRO::entryToIndex(const ZipEntryRO entry) const | 
|  | 120 | { | 
|  | 121 | long ent = ((long) entry) - kZipEntryAdj; | 
|  | 122 | if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) { | 
|  | 123 | ALOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent); | 
|  | 124 | return -1; | 
|  | 125 | } | 
|  | 126 | return ent; | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 |  | 
|  | 130 | /* | 
|  | 131 | * Open the specified file read-only.  We memory-map the entire thing and | 
|  | 132 | * close the file before returning. | 
|  | 133 | */ | 
|  | 134 | status_t ZipFileRO::open(const char* zipFileName) | 
|  | 135 | { | 
|  | 136 | int fd = -1; | 
|  | 137 |  | 
|  | 138 | assert(mDirectoryMap == NULL); | 
|  | 139 |  | 
|  | 140 | /* | 
|  | 141 | * Open and map the specified file. | 
|  | 142 | */ | 
|  | 143 | fd = ::open(zipFileName, O_RDONLY | O_BINARY); | 
|  | 144 | if (fd < 0) { | 
|  | 145 | ALOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno)); | 
|  | 146 | return NAME_NOT_FOUND; | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | mFileLength = lseek64(fd, 0, SEEK_END); | 
|  | 150 | if (mFileLength < kEOCDLen) { | 
|  | 151 | TEMP_FAILURE_RETRY(close(fd)); | 
|  | 152 | return UNKNOWN_ERROR; | 
|  | 153 | } | 
|  | 154 |  | 
|  | 155 | if (mFileName != NULL) { | 
|  | 156 | free(mFileName); | 
|  | 157 | } | 
|  | 158 | mFileName = strdup(zipFileName); | 
|  | 159 |  | 
|  | 160 | mFd = fd; | 
|  | 161 |  | 
|  | 162 | /* | 
|  | 163 | * Find the Central Directory and store its size and number of entries. | 
|  | 164 | */ | 
|  | 165 | if (!mapCentralDirectory()) { | 
|  | 166 | goto bail; | 
|  | 167 | } | 
|  | 168 |  | 
|  | 169 | /* | 
|  | 170 | * Verify Central Directory and create data structures for fast access. | 
|  | 171 | */ | 
|  | 172 | if (!parseZipArchive()) { | 
|  | 173 | goto bail; | 
|  | 174 | } | 
|  | 175 |  | 
|  | 176 | return OK; | 
|  | 177 |  | 
|  | 178 | bail: | 
|  | 179 | free(mFileName); | 
|  | 180 | mFileName = NULL; | 
|  | 181 | TEMP_FAILURE_RETRY(close(fd)); | 
|  | 182 | return UNKNOWN_ERROR; | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | /* | 
|  | 186 | * Parse the Zip archive, verifying its contents and initializing internal | 
|  | 187 | * data structures. | 
|  | 188 | */ | 
|  | 189 | bool ZipFileRO::mapCentralDirectory(void) | 
|  | 190 | { | 
|  | 191 | ssize_t readAmount = kMaxEOCDSearch; | 
|  | 192 | if (readAmount > (ssize_t) mFileLength) | 
|  | 193 | readAmount = mFileLength; | 
|  | 194 |  | 
|  | 195 | unsigned char* scanBuf = (unsigned char*) malloc(readAmount); | 
|  | 196 | if (scanBuf == NULL) { | 
|  | 197 | ALOGW("couldn't allocate scanBuf: %s", strerror(errno)); | 
|  | 198 | free(scanBuf); | 
|  | 199 | return false; | 
|  | 200 | } | 
|  | 201 |  | 
|  | 202 | /* | 
|  | 203 | * Make sure this is a Zip archive. | 
|  | 204 | */ | 
|  | 205 | if (lseek64(mFd, 0, SEEK_SET) != 0) { | 
|  | 206 | ALOGW("seek to start failed: %s", strerror(errno)); | 
|  | 207 | free(scanBuf); | 
|  | 208 | return false; | 
|  | 209 | } | 
|  | 210 |  | 
|  | 211 | ssize_t actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, sizeof(int32_t))); | 
|  | 212 | if (actual != (ssize_t) sizeof(int32_t)) { | 
|  | 213 | ALOGI("couldn't read first signature from zip archive: %s", strerror(errno)); | 
|  | 214 | free(scanBuf); | 
|  | 215 | return false; | 
|  | 216 | } | 
|  | 217 |  | 
|  | 218 | { | 
|  | 219 | unsigned int header = get4LE(scanBuf); | 
|  | 220 | if (header == kEOCDSignature) { | 
|  | 221 | ALOGI("Found Zip archive, but it looks empty\n"); | 
|  | 222 | free(scanBuf); | 
|  | 223 | return false; | 
|  | 224 | } else if (header != kLFHSignature) { | 
|  | 225 | ALOGV("Not a Zip archive (found 0x%08x)\n", header); | 
|  | 226 | free(scanBuf); | 
|  | 227 | return false; | 
|  | 228 | } | 
|  | 229 | } | 
|  | 230 |  | 
|  | 231 | /* | 
|  | 232 | * Perform the traditional EOCD snipe hunt. | 
|  | 233 | * | 
|  | 234 | * We're searching for the End of Central Directory magic number, | 
|  | 235 | * which appears at the start of the EOCD block.  It's followed by | 
|  | 236 | * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We | 
|  | 237 | * need to read the last part of the file into a buffer, dig through | 
|  | 238 | * it to find the magic number, parse some values out, and use those | 
|  | 239 | * to determine the extent of the CD. | 
|  | 240 | * | 
|  | 241 | * We start by pulling in the last part of the file. | 
|  | 242 | */ | 
|  | 243 | off64_t searchStart = mFileLength - readAmount; | 
|  | 244 |  | 
|  | 245 | if (lseek64(mFd, searchStart, SEEK_SET) != searchStart) { | 
|  | 246 | ALOGW("seek %ld failed: %s\n",  (long) searchStart, strerror(errno)); | 
|  | 247 | free(scanBuf); | 
|  | 248 | return false; | 
|  | 249 | } | 
|  | 250 | actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, readAmount)); | 
|  | 251 | if (actual != (ssize_t) readAmount) { | 
|  | 252 | ALOGW("Zip: read " ZD ", expected " ZD ". Failed: %s\n", | 
|  | 253 | (ZD_TYPE) actual, (ZD_TYPE) readAmount, strerror(errno)); | 
|  | 254 | free(scanBuf); | 
|  | 255 | return false; | 
|  | 256 | } | 
|  | 257 |  | 
|  | 258 | /* | 
|  | 259 | * Scan backward for the EOCD magic.  In an archive without a trailing | 
|  | 260 | * comment, we'll find it on the first try.  (We may want to consider | 
|  | 261 | * doing an initial minimal read; if we don't find it, retry with a | 
|  | 262 | * second read as above.) | 
|  | 263 | */ | 
|  | 264 | int i; | 
|  | 265 | for (i = readAmount - kEOCDLen; i >= 0; i--) { | 
|  | 266 | if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) { | 
|  | 267 | ALOGV("+++ Found EOCD at buf+%d\n", i); | 
|  | 268 | break; | 
|  | 269 | } | 
|  | 270 | } | 
|  | 271 | if (i < 0) { | 
|  | 272 | ALOGD("Zip: EOCD not found, %s is not zip\n", mFileName); | 
|  | 273 | free(scanBuf); | 
|  | 274 | return false; | 
|  | 275 | } | 
|  | 276 |  | 
|  | 277 | off64_t eocdOffset = searchStart + i; | 
|  | 278 | const unsigned char* eocdPtr = scanBuf + i; | 
|  | 279 |  | 
|  | 280 | assert(eocdOffset < mFileLength); | 
|  | 281 |  | 
|  | 282 | /* | 
|  | 283 | * Grab the CD offset and size, and the number of entries in the | 
|  | 284 | * archive. After that, we can release our EOCD hunt buffer. | 
|  | 285 | */ | 
|  | 286 | unsigned int numEntries = get2LE(eocdPtr + kEOCDNumEntries); | 
|  | 287 | unsigned int dirSize = get4LE(eocdPtr + kEOCDSize); | 
|  | 288 | unsigned int dirOffset = get4LE(eocdPtr + kEOCDFileOffset); | 
|  | 289 | free(scanBuf); | 
|  | 290 |  | 
|  | 291 | // Verify that they look reasonable. | 
|  | 292 | if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) { | 
|  | 293 | ALOGW("bad offsets (dir %ld, size %u, eocd %ld)\n", | 
|  | 294 | (long) dirOffset, dirSize, (long) eocdOffset); | 
|  | 295 | return false; | 
|  | 296 | } | 
|  | 297 | if (numEntries == 0) { | 
|  | 298 | ALOGW("empty archive?\n"); | 
|  | 299 | return false; | 
|  | 300 | } | 
|  | 301 |  | 
|  | 302 | ALOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n", | 
|  | 303 | numEntries, dirSize, dirOffset); | 
|  | 304 |  | 
|  | 305 | mDirectoryMap = new FileMap(); | 
|  | 306 | if (mDirectoryMap == NULL) { | 
|  | 307 | ALOGW("Unable to create directory map: %s", strerror(errno)); | 
|  | 308 | return false; | 
|  | 309 | } | 
|  | 310 |  | 
|  | 311 | if (!mDirectoryMap->create(mFileName, mFd, dirOffset, dirSize, true)) { | 
|  | 312 | ALOGW("Unable to map '%s' (" ZD " to " ZD "): %s\n", mFileName, | 
|  | 313 | (ZD_TYPE) dirOffset, (ZD_TYPE) (dirOffset + dirSize), strerror(errno)); | 
|  | 314 | return false; | 
|  | 315 | } | 
|  | 316 |  | 
|  | 317 | mNumEntries = numEntries; | 
|  | 318 | mDirectoryOffset = dirOffset; | 
|  | 319 |  | 
|  | 320 | return true; | 
|  | 321 | } | 
|  | 322 |  | 
|  | 323 | bool ZipFileRO::parseZipArchive(void) | 
|  | 324 | { | 
|  | 325 | bool result = false; | 
|  | 326 | const unsigned char* cdPtr = (const unsigned char*) mDirectoryMap->getDataPtr(); | 
|  | 327 | size_t cdLength = mDirectoryMap->getDataLength(); | 
|  | 328 | int numEntries = mNumEntries; | 
|  | 329 |  | 
|  | 330 | /* | 
|  | 331 | * Create hash table.  We have a minimum 75% load factor, possibly as | 
|  | 332 | * low as 50% after we round off to a power of 2. | 
|  | 333 | */ | 
|  | 334 | mHashTableSize = roundUpPower2(1 + (numEntries * 4) / 3); | 
|  | 335 | mHashTable = (HashEntry*) calloc(mHashTableSize, sizeof(HashEntry)); | 
|  | 336 |  | 
|  | 337 | /* | 
|  | 338 | * Walk through the central directory, adding entries to the hash | 
|  | 339 | * table. | 
|  | 340 | */ | 
|  | 341 | const unsigned char* ptr = cdPtr; | 
|  | 342 | for (int i = 0; i < numEntries; i++) { | 
|  | 343 | if (get4LE(ptr) != kCDESignature) { | 
|  | 344 | ALOGW("Missed a central dir sig (at %d)\n", i); | 
|  | 345 | goto bail; | 
|  | 346 | } | 
|  | 347 | if (ptr + kCDELen > cdPtr + cdLength) { | 
|  | 348 | ALOGW("Ran off the end (at %d)\n", i); | 
|  | 349 | goto bail; | 
|  | 350 | } | 
|  | 351 |  | 
|  | 352 | long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset); | 
|  | 353 | if (localHdrOffset >= mDirectoryOffset) { | 
|  | 354 | ALOGW("bad LFH offset %ld at entry %d\n", localHdrOffset, i); | 
|  | 355 | goto bail; | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | unsigned int fileNameLen, extraLen, commentLen, hash; | 
|  | 359 |  | 
|  | 360 | fileNameLen = get2LE(ptr + kCDENameLen); | 
|  | 361 | extraLen = get2LE(ptr + kCDEExtraLen); | 
|  | 362 | commentLen = get2LE(ptr + kCDECommentLen); | 
|  | 363 |  | 
|  | 364 | /* add the CDE filename to the hash table */ | 
|  | 365 | hash = computeHash((const char*)ptr + kCDELen, fileNameLen); | 
|  | 366 | addToHash((const char*)ptr + kCDELen, fileNameLen, hash); | 
|  | 367 |  | 
|  | 368 | ptr += kCDELen + fileNameLen + extraLen + commentLen; | 
|  | 369 | if ((size_t)(ptr - cdPtr) > cdLength) { | 
|  | 370 | ALOGW("bad CD advance (%d vs " ZD ") at entry %d\n", | 
|  | 371 | (int) (ptr - cdPtr), (ZD_TYPE) cdLength, i); | 
|  | 372 | goto bail; | 
|  | 373 | } | 
|  | 374 | } | 
|  | 375 | ALOGV("+++ zip good scan %d entries\n", numEntries); | 
|  | 376 | result = true; | 
|  | 377 |  | 
|  | 378 | bail: | 
|  | 379 | return result; | 
|  | 380 | } | 
|  | 381 |  | 
|  | 382 | /* | 
|  | 383 | * Simple string hash function for non-null-terminated strings. | 
|  | 384 | */ | 
|  | 385 | /*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len) | 
|  | 386 | { | 
|  | 387 | unsigned int hash = 0; | 
|  | 388 |  | 
|  | 389 | while (len--) | 
|  | 390 | hash = hash * 31 + *str++; | 
|  | 391 |  | 
|  | 392 | return hash; | 
|  | 393 | } | 
|  | 394 |  | 
|  | 395 | /* | 
|  | 396 | * Add a new entry to the hash table. | 
|  | 397 | */ | 
|  | 398 | void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash) | 
|  | 399 | { | 
|  | 400 | int ent = hash & (mHashTableSize-1); | 
|  | 401 |  | 
|  | 402 | /* | 
|  | 403 | * We over-allocate the table, so we're guaranteed to find an empty slot. | 
|  | 404 | */ | 
|  | 405 | while (mHashTable[ent].name != NULL) | 
|  | 406 | ent = (ent + 1) & (mHashTableSize-1); | 
|  | 407 |  | 
|  | 408 | mHashTable[ent].name = str; | 
|  | 409 | mHashTable[ent].nameLen = strLen; | 
|  | 410 | } | 
|  | 411 |  | 
|  | 412 | /* | 
|  | 413 | * Find a matching entry. | 
|  | 414 | * | 
|  | 415 | * Returns NULL if not found. | 
|  | 416 | */ | 
|  | 417 | ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const | 
|  | 418 | { | 
|  | 419 | /* | 
|  | 420 | * If the ZipFileRO instance is not initialized, the entry number will | 
|  | 421 | * end up being garbage since mHashTableSize is -1. | 
|  | 422 | */ | 
|  | 423 | if (mHashTableSize <= 0) { | 
|  | 424 | return NULL; | 
|  | 425 | } | 
|  | 426 |  | 
|  | 427 | int nameLen = strlen(fileName); | 
|  | 428 | unsigned int hash = computeHash(fileName, nameLen); | 
|  | 429 | int ent = hash & (mHashTableSize-1); | 
|  | 430 |  | 
|  | 431 | while (mHashTable[ent].name != NULL) { | 
|  | 432 | if (mHashTable[ent].nameLen == nameLen && | 
|  | 433 | memcmp(mHashTable[ent].name, fileName, nameLen) == 0) | 
|  | 434 | { | 
|  | 435 | /* match */ | 
|  | 436 | return (ZipEntryRO)(long)(ent + kZipEntryAdj); | 
|  | 437 | } | 
|  | 438 |  | 
|  | 439 | ent = (ent + 1) & (mHashTableSize-1); | 
|  | 440 | } | 
|  | 441 |  | 
|  | 442 | return NULL; | 
|  | 443 | } | 
|  | 444 |  | 
|  | 445 | /* | 
|  | 446 | * Find the Nth entry. | 
|  | 447 | * | 
|  | 448 | * This currently involves walking through the sparse hash table, counting | 
|  | 449 | * non-empty entries.  If we need to speed this up we can either allocate | 
|  | 450 | * a parallel lookup table or (perhaps better) provide an iterator interface. | 
|  | 451 | */ | 
|  | 452 | ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const | 
|  | 453 | { | 
|  | 454 | if (idx < 0 || idx >= mNumEntries) { | 
|  | 455 | ALOGW("Invalid index %d\n", idx); | 
|  | 456 | return NULL; | 
|  | 457 | } | 
|  | 458 |  | 
|  | 459 | for (int ent = 0; ent < mHashTableSize; ent++) { | 
|  | 460 | if (mHashTable[ent].name != NULL) { | 
|  | 461 | if (idx-- == 0) | 
|  | 462 | return (ZipEntryRO) (ent + kZipEntryAdj); | 
|  | 463 | } | 
|  | 464 | } | 
|  | 465 |  | 
|  | 466 | return NULL; | 
|  | 467 | } | 
|  | 468 |  | 
|  | 469 | /* | 
|  | 470 | * Get the useful fields from the zip entry. | 
|  | 471 | * | 
|  | 472 | * Returns "false" if the offsets to the fields or the contents of the fields | 
|  | 473 | * appear to be bogus. | 
|  | 474 | */ | 
|  | 475 | bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen, | 
|  | 476 | size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const | 
|  | 477 | { | 
|  | 478 | bool ret = false; | 
|  | 479 |  | 
|  | 480 | const int ent = entryToIndex(entry); | 
|  | 481 | if (ent < 0) | 
|  | 482 | return false; | 
|  | 483 |  | 
|  | 484 | HashEntry hashEntry = mHashTable[ent]; | 
|  | 485 |  | 
|  | 486 | /* | 
|  | 487 | * Recover the start of the central directory entry from the filename | 
|  | 488 | * pointer.  The filename is the first entry past the fixed-size data, | 
|  | 489 | * so we can just subtract back from that. | 
|  | 490 | */ | 
|  | 491 | const unsigned char* ptr = (const unsigned char*) hashEntry.name; | 
|  | 492 | off64_t cdOffset = mDirectoryOffset; | 
|  | 493 |  | 
|  | 494 | ptr -= kCDELen; | 
|  | 495 |  | 
|  | 496 | int method = get2LE(ptr + kCDEMethod); | 
|  | 497 | if (pMethod != NULL) | 
|  | 498 | *pMethod = method; | 
|  | 499 |  | 
|  | 500 | if (pModWhen != NULL) | 
|  | 501 | *pModWhen = get4LE(ptr + kCDEModWhen); | 
|  | 502 | if (pCrc32 != NULL) | 
|  | 503 | *pCrc32 = get4LE(ptr + kCDECRC); | 
|  | 504 |  | 
|  | 505 | size_t compLen = get4LE(ptr + kCDECompLen); | 
|  | 506 | if (pCompLen != NULL) | 
|  | 507 | *pCompLen = compLen; | 
|  | 508 | size_t uncompLen = get4LE(ptr + kCDEUncompLen); | 
|  | 509 | if (pUncompLen != NULL) | 
|  | 510 | *pUncompLen = uncompLen; | 
|  | 511 |  | 
|  | 512 | /* | 
|  | 513 | * If requested, determine the offset of the start of the data.  All we | 
|  | 514 | * have is the offset to the Local File Header, which is variable size, | 
|  | 515 | * so we have to read the contents of the struct to figure out where | 
|  | 516 | * the actual data starts. | 
|  | 517 | * | 
|  | 518 | * We also need to make sure that the lengths are not so large that | 
|  | 519 | * somebody trying to map the compressed or uncompressed data runs | 
|  | 520 | * off the end of the mapped region. | 
|  | 521 | * | 
|  | 522 | * Note we don't verify compLen/uncompLen if they don't request the | 
|  | 523 | * dataOffset, because dataOffset is expensive to determine.  However, | 
|  | 524 | * if they don't have the file offset, they're not likely to be doing | 
|  | 525 | * anything with the contents. | 
|  | 526 | */ | 
|  | 527 | if (pOffset != NULL) { | 
|  | 528 | long localHdrOffset = get4LE(ptr + kCDELocalOffset); | 
|  | 529 | if (localHdrOffset + kLFHLen >= cdOffset) { | 
|  | 530 | ALOGE("ERROR: bad local hdr offset in zip\n"); | 
|  | 531 | return false; | 
|  | 532 | } | 
|  | 533 |  | 
|  | 534 | unsigned char lfhBuf[kLFHLen]; | 
|  | 535 |  | 
|  | 536 | #ifdef HAVE_PREAD | 
|  | 537 | /* | 
|  | 538 | * This file descriptor might be from zygote's preloaded assets, | 
|  | 539 | * so we need to do an pread64() instead of a lseek64() + read() to | 
|  | 540 | * guarantee atomicity across the processes with the shared file | 
|  | 541 | * descriptors. | 
|  | 542 | */ | 
|  | 543 | ssize_t actual = | 
|  | 544 | TEMP_FAILURE_RETRY(pread64(mFd, lfhBuf, sizeof(lfhBuf), localHdrOffset)); | 
|  | 545 |  | 
|  | 546 | if (actual != sizeof(lfhBuf)) { | 
|  | 547 | ALOGW("failed reading lfh from offset %ld\n", localHdrOffset); | 
|  | 548 | return false; | 
|  | 549 | } | 
|  | 550 |  | 
|  | 551 | if (get4LE(lfhBuf) != kLFHSignature) { | 
|  | 552 | ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; " | 
|  | 553 | "got: data=0x%08lx\n", | 
|  | 554 | localHdrOffset, kLFHSignature, get4LE(lfhBuf)); | 
|  | 555 | return false; | 
|  | 556 | } | 
|  | 557 | #else /* HAVE_PREAD */ | 
|  | 558 | /* | 
|  | 559 | * For hosts don't have pread64() we cannot guarantee atomic reads from | 
|  | 560 | * an offset in a file. Android should never run on those platforms. | 
|  | 561 | * File descriptors inherited from a fork() share file offsets and | 
|  | 562 | * there would be nothing to protect from two different processes | 
|  | 563 | * calling lseek64() concurrently. | 
|  | 564 | */ | 
|  | 565 |  | 
|  | 566 | { | 
|  | 567 | AutoMutex _l(mFdLock); | 
|  | 568 |  | 
|  | 569 | if (lseek64(mFd, localHdrOffset, SEEK_SET) != localHdrOffset) { | 
|  | 570 | ALOGW("failed seeking to lfh at offset %ld\n", localHdrOffset); | 
|  | 571 | return false; | 
|  | 572 | } | 
|  | 573 |  | 
|  | 574 | ssize_t actual = | 
|  | 575 | TEMP_FAILURE_RETRY(read(mFd, lfhBuf, sizeof(lfhBuf))); | 
|  | 576 | if (actual != sizeof(lfhBuf)) { | 
|  | 577 | ALOGW("failed reading lfh from offset %ld\n", localHdrOffset); | 
|  | 578 | return false; | 
|  | 579 | } | 
|  | 580 |  | 
|  | 581 | if (get4LE(lfhBuf) != kLFHSignature) { | 
|  | 582 | off64_t actualOffset = lseek64(mFd, 0, SEEK_CUR); | 
|  | 583 | ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; " | 
|  | 584 | "got: offset=" ZD " data=0x%08lx\n", | 
|  | 585 | localHdrOffset, kLFHSignature, (ZD_TYPE) actualOffset, get4LE(lfhBuf)); | 
|  | 586 | return false; | 
|  | 587 | } | 
|  | 588 | } | 
|  | 589 | #endif /* HAVE_PREAD */ | 
|  | 590 |  | 
|  | 591 | off64_t dataOffset = localHdrOffset + kLFHLen | 
|  | 592 | + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen); | 
|  | 593 | if (dataOffset >= cdOffset) { | 
|  | 594 | ALOGW("bad data offset %ld in zip\n", (long) dataOffset); | 
|  | 595 | return false; | 
|  | 596 | } | 
|  | 597 |  | 
|  | 598 | /* check lengths */ | 
|  | 599 | if ((off64_t)(dataOffset + compLen) > cdOffset) { | 
|  | 600 | ALOGW("bad compressed length in zip (%ld + " ZD " > %ld)\n", | 
|  | 601 | (long) dataOffset, (ZD_TYPE) compLen, (long) cdOffset); | 
|  | 602 | return false; | 
|  | 603 | } | 
|  | 604 |  | 
|  | 605 | if (method == kCompressStored && | 
|  | 606 | (off64_t)(dataOffset + uncompLen) > cdOffset) | 
|  | 607 | { | 
|  | 608 | ALOGE("ERROR: bad uncompressed length in zip (%ld + " ZD " > %ld)\n", | 
|  | 609 | (long) dataOffset, (ZD_TYPE) uncompLen, (long) cdOffset); | 
|  | 610 | return false; | 
|  | 611 | } | 
|  | 612 |  | 
|  | 613 | *pOffset = dataOffset; | 
|  | 614 | } | 
|  | 615 |  | 
|  | 616 | return true; | 
|  | 617 | } | 
|  | 618 |  | 
|  | 619 | /* | 
|  | 620 | * Copy the entry's filename to the buffer. | 
|  | 621 | */ | 
|  | 622 | int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) | 
|  | 623 | const | 
|  | 624 | { | 
|  | 625 | int ent = entryToIndex(entry); | 
|  | 626 | if (ent < 0) | 
|  | 627 | return -1; | 
|  | 628 |  | 
|  | 629 | int nameLen = mHashTable[ent].nameLen; | 
|  | 630 | if (bufLen < nameLen+1) | 
|  | 631 | return nameLen+1; | 
|  | 632 |  | 
|  | 633 | memcpy(buffer, mHashTable[ent].name, nameLen); | 
|  | 634 | buffer[nameLen] = '\0'; | 
|  | 635 | return 0; | 
|  | 636 | } | 
|  | 637 |  | 
|  | 638 | /* | 
|  | 639 | * Create a new FileMap object that spans the data in "entry". | 
|  | 640 | */ | 
|  | 641 | FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const | 
|  | 642 | { | 
|  | 643 | /* | 
|  | 644 | * TODO: the efficient way to do this is to modify FileMap to allow | 
|  | 645 | * sub-regions of a file to be mapped.  A reference-counting scheme | 
|  | 646 | * can manage the base memory mapping.  For now, we just create a brand | 
|  | 647 | * new mapping off of the Zip archive file descriptor. | 
|  | 648 | */ | 
|  | 649 |  | 
|  | 650 | FileMap* newMap; | 
|  | 651 | size_t compLen; | 
|  | 652 | off64_t offset; | 
|  | 653 |  | 
|  | 654 | if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL)) | 
|  | 655 | return NULL; | 
|  | 656 |  | 
|  | 657 | newMap = new FileMap(); | 
|  | 658 | if (!newMap->create(mFileName, mFd, offset, compLen, true)) { | 
|  | 659 | newMap->release(); | 
|  | 660 | return NULL; | 
|  | 661 | } | 
|  | 662 |  | 
|  | 663 | return newMap; | 
|  | 664 | } | 
|  | 665 |  | 
|  | 666 | /* | 
|  | 667 | * Uncompress an entry, in its entirety, into the provided output buffer. | 
|  | 668 | * | 
|  | 669 | * This doesn't verify the data's CRC, which might be useful for | 
|  | 670 | * uncompressed data.  The caller should be able to manage it. | 
|  | 671 | */ | 
|  | 672 | bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const | 
|  | 673 | { | 
|  | 674 | const size_t kSequentialMin = 32768; | 
|  | 675 | bool result = false; | 
|  | 676 | int ent = entryToIndex(entry); | 
|  | 677 | if (ent < 0) | 
|  | 678 | return -1; | 
|  | 679 |  | 
|  | 680 | int method; | 
|  | 681 | size_t uncompLen, compLen; | 
|  | 682 | off64_t offset; | 
|  | 683 | const unsigned char* ptr; | 
|  | 684 |  | 
|  | 685 | getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); | 
|  | 686 |  | 
|  | 687 | FileMap* file = createEntryFileMap(entry); | 
|  | 688 | if (file == NULL) { | 
|  | 689 | goto bail; | 
|  | 690 | } | 
|  | 691 |  | 
|  | 692 | ptr = (const unsigned char*) file->getDataPtr(); | 
|  | 693 |  | 
|  | 694 | /* | 
|  | 695 | * Experiment with madvise hint.  When we want to uncompress a file, | 
|  | 696 | * we pull some stuff out of the central dir entry and then hit a | 
|  | 697 | * bunch of compressed or uncompressed data sequentially.  The CDE | 
|  | 698 | * visit will cause a limited amount of read-ahead because it's at | 
|  | 699 | * the end of the file.  We could end up doing lots of extra disk | 
|  | 700 | * access if the file we're prying open is small.  Bottom line is we | 
|  | 701 | * probably don't want to turn MADV_SEQUENTIAL on and leave it on. | 
|  | 702 | * | 
|  | 703 | * So, if the compressed size of the file is above a certain minimum | 
|  | 704 | * size, temporarily boost the read-ahead in the hope that the extra | 
|  | 705 | * pair of system calls are negated by a reduction in page faults. | 
|  | 706 | */ | 
|  | 707 | if (compLen > kSequentialMin) | 
|  | 708 | file->advise(FileMap::SEQUENTIAL); | 
|  | 709 |  | 
|  | 710 | if (method == kCompressStored) { | 
|  | 711 | memcpy(buffer, ptr, uncompLen); | 
|  | 712 | } else { | 
|  | 713 | if (!inflateBuffer(buffer, ptr, uncompLen, compLen)) | 
|  | 714 | goto unmap; | 
|  | 715 | } | 
|  | 716 |  | 
|  | 717 | if (compLen > kSequentialMin) | 
|  | 718 | file->advise(FileMap::NORMAL); | 
|  | 719 |  | 
|  | 720 | result = true; | 
|  | 721 |  | 
|  | 722 | unmap: | 
|  | 723 | file->release(); | 
|  | 724 | bail: | 
|  | 725 | return result; | 
|  | 726 | } | 
|  | 727 |  | 
|  | 728 | /* | 
|  | 729 | * Uncompress an entry, in its entirety, to an open file descriptor. | 
|  | 730 | * | 
|  | 731 | * This doesn't verify the data's CRC, but probably should. | 
|  | 732 | */ | 
|  | 733 | bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const | 
|  | 734 | { | 
|  | 735 | bool result = false; | 
|  | 736 | int ent = entryToIndex(entry); | 
|  | 737 | if (ent < 0) | 
|  | 738 | return -1; | 
|  | 739 |  | 
|  | 740 | int method; | 
|  | 741 | size_t uncompLen, compLen; | 
|  | 742 | off64_t offset; | 
|  | 743 | const unsigned char* ptr; | 
|  | 744 |  | 
|  | 745 | getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); | 
|  | 746 |  | 
|  | 747 | FileMap* file = createEntryFileMap(entry); | 
|  | 748 | if (file == NULL) { | 
|  | 749 | goto bail; | 
|  | 750 | } | 
|  | 751 |  | 
|  | 752 | ptr = (const unsigned char*) file->getDataPtr(); | 
|  | 753 |  | 
|  | 754 | if (method == kCompressStored) { | 
|  | 755 | ssize_t actual = write(fd, ptr, uncompLen); | 
|  | 756 | if (actual < 0) { | 
|  | 757 | ALOGE("Write failed: %s\n", strerror(errno)); | 
|  | 758 | goto unmap; | 
|  | 759 | } else if ((size_t) actual != uncompLen) { | 
|  | 760 | ALOGE("Partial write during uncompress (" ZD " of " ZD ")\n", | 
|  | 761 | (ZD_TYPE) actual, (ZD_TYPE) uncompLen); | 
|  | 762 | goto unmap; | 
|  | 763 | } else { | 
|  | 764 | ALOGI("+++ successful write\n"); | 
|  | 765 | } | 
|  | 766 | } else { | 
|  | 767 | if (!inflateBuffer(fd, ptr, uncompLen, compLen)) | 
|  | 768 | goto unmap; | 
|  | 769 | } | 
|  | 770 |  | 
|  | 771 | result = true; | 
|  | 772 |  | 
|  | 773 | unmap: | 
|  | 774 | file->release(); | 
|  | 775 | bail: | 
|  | 776 | return result; | 
|  | 777 | } | 
|  | 778 |  | 
|  | 779 | /* | 
|  | 780 | * Uncompress "deflate" data from one buffer to another. | 
|  | 781 | */ | 
|  | 782 | /*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf, | 
|  | 783 | size_t uncompLen, size_t compLen) | 
|  | 784 | { | 
|  | 785 | bool result = false; | 
|  | 786 | z_stream zstream; | 
|  | 787 | int zerr; | 
|  | 788 |  | 
|  | 789 | /* | 
|  | 790 | * Initialize the zlib stream struct. | 
|  | 791 | */ | 
|  | 792 | memset(&zstream, 0, sizeof(zstream)); | 
|  | 793 | zstream.zalloc = Z_NULL; | 
|  | 794 | zstream.zfree = Z_NULL; | 
|  | 795 | zstream.opaque = Z_NULL; | 
|  | 796 | zstream.next_in = (Bytef*)inBuf; | 
|  | 797 | zstream.avail_in = compLen; | 
|  | 798 | zstream.next_out = (Bytef*) outBuf; | 
|  | 799 | zstream.avail_out = uncompLen; | 
|  | 800 | zstream.data_type = Z_UNKNOWN; | 
|  | 801 |  | 
|  | 802 | /* | 
|  | 803 | * Use the undocumented "negative window bits" feature to tell zlib | 
|  | 804 | * that there's no zlib header waiting for it. | 
|  | 805 | */ | 
|  | 806 | zerr = inflateInit2(&zstream, -MAX_WBITS); | 
|  | 807 | if (zerr != Z_OK) { | 
|  | 808 | if (zerr == Z_VERSION_ERROR) { | 
|  | 809 | ALOGE("Installed zlib is not compatible with linked version (%s)\n", | 
|  | 810 | ZLIB_VERSION); | 
|  | 811 | } else { | 
|  | 812 | ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); | 
|  | 813 | } | 
|  | 814 | goto bail; | 
|  | 815 | } | 
|  | 816 |  | 
|  | 817 | /* | 
|  | 818 | * Expand data. | 
|  | 819 | */ | 
|  | 820 | zerr = inflate(&zstream, Z_FINISH); | 
|  | 821 | if (zerr != Z_STREAM_END) { | 
|  | 822 | ALOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", | 
|  | 823 | zerr, zstream.next_in, zstream.avail_in, | 
|  | 824 | zstream.next_out, zstream.avail_out); | 
|  | 825 | goto z_bail; | 
|  | 826 | } | 
|  | 827 |  | 
|  | 828 | /* paranoia */ | 
|  | 829 | if (zstream.total_out != uncompLen) { | 
|  | 830 | ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n", | 
|  | 831 | zstream.total_out, (ZD_TYPE) uncompLen); | 
|  | 832 | goto z_bail; | 
|  | 833 | } | 
|  | 834 |  | 
|  | 835 | result = true; | 
|  | 836 |  | 
|  | 837 | z_bail: | 
|  | 838 | inflateEnd(&zstream);        /* free up any allocated structures */ | 
|  | 839 |  | 
|  | 840 | bail: | 
|  | 841 | return result; | 
|  | 842 | } | 
|  | 843 |  | 
|  | 844 | /* | 
|  | 845 | * Uncompress "deflate" data from one buffer to an open file descriptor. | 
|  | 846 | */ | 
|  | 847 | /*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf, | 
|  | 848 | size_t uncompLen, size_t compLen) | 
|  | 849 | { | 
|  | 850 | bool result = false; | 
|  | 851 | const size_t kWriteBufSize = 32768; | 
|  | 852 | unsigned char writeBuf[kWriteBufSize]; | 
|  | 853 | z_stream zstream; | 
|  | 854 | int zerr; | 
|  | 855 |  | 
|  | 856 | /* | 
|  | 857 | * Initialize the zlib stream struct. | 
|  | 858 | */ | 
|  | 859 | memset(&zstream, 0, sizeof(zstream)); | 
|  | 860 | zstream.zalloc = Z_NULL; | 
|  | 861 | zstream.zfree = Z_NULL; | 
|  | 862 | zstream.opaque = Z_NULL; | 
|  | 863 | zstream.next_in = (Bytef*)inBuf; | 
|  | 864 | zstream.avail_in = compLen; | 
|  | 865 | zstream.next_out = (Bytef*) writeBuf; | 
|  | 866 | zstream.avail_out = sizeof(writeBuf); | 
|  | 867 | zstream.data_type = Z_UNKNOWN; | 
|  | 868 |  | 
|  | 869 | /* | 
|  | 870 | * Use the undocumented "negative window bits" feature to tell zlib | 
|  | 871 | * that there's no zlib header waiting for it. | 
|  | 872 | */ | 
|  | 873 | zerr = inflateInit2(&zstream, -MAX_WBITS); | 
|  | 874 | if (zerr != Z_OK) { | 
|  | 875 | if (zerr == Z_VERSION_ERROR) { | 
|  | 876 | ALOGE("Installed zlib is not compatible with linked version (%s)\n", | 
|  | 877 | ZLIB_VERSION); | 
|  | 878 | } else { | 
|  | 879 | ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); | 
|  | 880 | } | 
|  | 881 | goto bail; | 
|  | 882 | } | 
|  | 883 |  | 
|  | 884 | /* | 
|  | 885 | * Loop while we have more to do. | 
|  | 886 | */ | 
|  | 887 | do { | 
|  | 888 | /* | 
|  | 889 | * Expand data. | 
|  | 890 | */ | 
|  | 891 | zerr = inflate(&zstream, Z_NO_FLUSH); | 
|  | 892 | if (zerr != Z_OK && zerr != Z_STREAM_END) { | 
|  | 893 | ALOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", | 
|  | 894 | zerr, zstream.next_in, zstream.avail_in, | 
|  | 895 | zstream.next_out, zstream.avail_out); | 
|  | 896 | goto z_bail; | 
|  | 897 | } | 
|  | 898 |  | 
|  | 899 | /* write when we're full or when we're done */ | 
|  | 900 | if (zstream.avail_out == 0 || | 
|  | 901 | (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) | 
|  | 902 | { | 
|  | 903 | long writeSize = zstream.next_out - writeBuf; | 
|  | 904 | int cc = write(fd, writeBuf, writeSize); | 
|  | 905 | if (cc != (int) writeSize) { | 
|  | 906 | ALOGW("write failed in inflate (%d vs %ld)\n", cc, writeSize); | 
|  | 907 | goto z_bail; | 
|  | 908 | } | 
|  | 909 |  | 
|  | 910 | zstream.next_out = writeBuf; | 
|  | 911 | zstream.avail_out = sizeof(writeBuf); | 
|  | 912 | } | 
|  | 913 | } while (zerr == Z_OK); | 
|  | 914 |  | 
|  | 915 | assert(zerr == Z_STREAM_END);       /* other errors should've been caught */ | 
|  | 916 |  | 
|  | 917 | /* paranoia */ | 
|  | 918 | if (zstream.total_out != uncompLen) { | 
|  | 919 | ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n", | 
|  | 920 | zstream.total_out, (ZD_TYPE) uncompLen); | 
|  | 921 | goto z_bail; | 
|  | 922 | } | 
|  | 923 |  | 
|  | 924 | result = true; | 
|  | 925 |  | 
|  | 926 | z_bail: | 
|  | 927 | inflateEnd(&zstream);        /* free up any allocated structures */ | 
|  | 928 |  | 
|  | 929 | bail: | 
|  | 930 | return result; | 
|  | 931 | } |