Add Zopfli-recompress option to zipalign

Zopfli provides compression roughly 5% better than zlib, while remaining
completely compatible with zlib decoders. This patch adds a "-z" option
to zipalign, recompressing all compressed files within the zip archive.

Change-Id: If177ca4b82ec701b7446861b2cfe08c6bd403813
diff --git a/tools/zipalign/Android.mk b/tools/zipalign/Android.mk
index 708c8bf..7986798 100644
--- a/tools/zipalign/Android.mk
+++ b/tools/zipalign/Android.mk
@@ -12,13 +12,15 @@
 	ZipEntry.cpp \
 	ZipFile.cpp
 
-LOCAL_C_INCLUDES += external/zlib
+LOCAL_C_INCLUDES += external/zlib \
+	external/zopfli/src
 
 LOCAL_STATIC_LIBRARIES := \
 	libandroidfw \
 	libutils \
 	libcutils \
-	liblog
+	liblog \
+	libzopfli
 
 ifeq ($(HOST_OS),linux)
 LOCAL_LDLIBS += -lrt
diff --git a/tools/zipalign/ZipAlign.cpp b/tools/zipalign/ZipAlign.cpp
index 8b2d1af..dc2826b 100644
--- a/tools/zipalign/ZipAlign.cpp
+++ b/tools/zipalign/ZipAlign.cpp
@@ -32,19 +32,20 @@
     fprintf(stderr, "Zip alignment utility\n");
     fprintf(stderr, "Copyright (C) 2009 The Android Open Source Project\n\n");
     fprintf(stderr,
-        "Usage: zipalign [-f] [-v] <align> infile.zip outfile.zip\n"
+        "Usage: zipalign [-f] [-v] [-z] <align> infile.zip outfile.zip\n"
         "       zipalign -c [-v] <align> infile.zip\n\n" );
     fprintf(stderr,
         "  <align>: alignment in bytes, e.g. '4' provides 32-bit alignment\n");
     fprintf(stderr, "  -c: check alignment only (does not modify file)\n");
     fprintf(stderr, "  -f: overwrite existing outfile.zip\n");
     fprintf(stderr, "  -v: verbose output\n");
+    fprintf(stderr, "  -z: recompress using Zopfli\n");
 }
 
 /*
  * Copy all entries from "pZin" to "pZout", aligning as needed.
  */
-static int copyAndAlign(ZipFile* pZin, ZipFile* pZout, int alignment)
+static int copyAndAlign(ZipFile* pZin, ZipFile* pZout, int alignment, bool zopfli)
 {
     int numEntries = pZin->getNumEntries();
     ZipEntry* pEntry;
@@ -67,6 +68,12 @@
             //    pEntry->getFileName(), (long) pEntry->getFileOffset(),
             //    (long) pEntry->getUncompressedLen());
 
+            if (zopfli) {
+                status = pZout->addRecompress(pZin, pEntry, &pNewEntry);
+                bias += pNewEntry->getCompressedLen() - pEntry->getCompressedLen();
+            } else {
+                status = pZout->add(pZin, pEntry, padding, &pNewEntry);
+            }
         } else {
             /*
              * Copy the entry, adjusting as required.  We assume that the
@@ -79,9 +86,9 @@
             //printf("--- %s: orig at %ld(+%d) len=%ld, adding pad=%d\n",
             //    pEntry->getFileName(), (long) pEntry->getFileOffset(),
             //    bias, (long) pEntry->getUncompressedLen(), padding);
+            status = pZout->add(pZin, pEntry, padding, &pNewEntry);
         }
 
-        status = pZout->add(pZin, pEntry, padding, &pNewEntry);
         if (status != NO_ERROR)
             return 1;
         bias += padding;
@@ -98,7 +105,7 @@
  * output file exists and "force" wasn't specified.
  */
 static int process(const char* inFileName, const char* outFileName,
-    int alignment, bool force)
+    int alignment, bool force, bool zopfli)
 {
     ZipFile zin, zout;
 
@@ -129,7 +136,7 @@
         return 1;
     }
 
-    int result = copyAndAlign(&zin, &zout, alignment);
+    int result = copyAndAlign(&zin, &zout, alignment, zopfli);
     if (result != 0) {
         printf("zipalign: failed rewriting '%s' to '%s'\n",
             inFileName, outFileName);
@@ -196,6 +203,7 @@
     bool check = false;
     bool force = false;
     bool verbose = false;
+    bool zopfli = false;
     int result = 1;
     int alignment;
     char* endp;
@@ -222,6 +230,9 @@
             case 'v':
                 verbose = true;
                 break;
+            case 'z':
+                zopfli = true;
+                break;
             default:
                 fprintf(stderr, "ERROR: unknown flag -%c\n", *cp);
                 wantUsage = true;
@@ -252,7 +263,7 @@
         result = verify(argv[1], alignment, verbose);
     } else {
         /* create the new archive */
-        result = process(argv[1], argv[2], alignment, force);
+        result = process(argv[1], argv[2], alignment, force, zopfli);
 
         /* trust, but verify */
         if (result == 0)
diff --git a/tools/zipalign/ZipFile.cpp b/tools/zipalign/ZipFile.cpp
index 8057068..3c5ec15 100644
--- a/tools/zipalign/ZipFile.cpp
+++ b/tools/zipalign/ZipFile.cpp
@@ -28,6 +28,8 @@
 #include <zlib.h>
 #define DEF_MEM_LEVEL 8                // normally in zutil.h?
 
+#include "zopfli/deflate.h"
+
 #include <memory.h>
 #include <sys/stat.h>
 #include <errno.h>
@@ -638,6 +640,141 @@
 }
 
 /*
+ * Add an entry by copying it from another zip file, recompressing with
+ * Zopfli if already compressed.
+ *
+ * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
+ */
+status_t ZipFile::addRecompress(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
+    ZipEntry** ppEntry)
+{
+    ZipEntry* pEntry = NULL;
+    status_t result;
+    long lfhPosn, startPosn, endPosn, uncompressedLen;
+
+    if (mReadOnly)
+        return INVALID_OPERATION;
+
+    /* make sure we're in a reasonable state */
+    assert(mZipFp != NULL);
+    assert(mEntries.size() == mEOCD.mTotalNumEntries);
+
+    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
+        result = UNKNOWN_ERROR;
+        goto bail;
+    }
+
+    pEntry = new ZipEntry;
+    if (pEntry == NULL) {
+        result = NO_MEMORY;
+        goto bail;
+    }
+
+    result = pEntry->initFromExternal(pSourceZip, pSourceEntry);
+    if (result != NO_ERROR)
+        goto bail;
+
+    /*
+     * From here on out, failures are more interesting.
+     */
+    mNeedCDRewrite = true;
+
+    /*
+     * Write the LFH, even though it's still mostly blank.  We need it
+     * as a place-holder.  In theory the LFH isn't necessary, but in
+     * practice some utilities demand it.
+     */
+    lfhPosn = ftell(mZipFp);
+    pEntry->mLFH.write(mZipFp);
+    startPosn = ftell(mZipFp);
+
+    /*
+     * Copy the data over.
+     *
+     * If the "has data descriptor" flag is set, we want to copy the DD
+     * fields as well.  This is a fixed-size area immediately following
+     * the data.
+     */
+    if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
+    {
+        result = UNKNOWN_ERROR;
+        goto bail;
+    }
+
+    uncompressedLen = pSourceEntry->getUncompressedLen();
+
+    if (pSourceEntry->isCompressed()) {
+        void *buf = pSourceZip->uncompress(pSourceEntry);
+        if (buf == NULL) {
+            result = NO_MEMORY;
+            goto bail;
+        }
+        long startPosn = ftell(mZipFp);
+        unsigned long crc;
+        if (compressFpToFp(mZipFp, NULL, buf, uncompressedLen, &crc) != NO_ERROR) {
+            ALOGW("recompress of '%s' failed\n", pEntry->mCDE.mFileName);
+            result = UNKNOWN_ERROR;
+            free(buf);
+            goto bail;
+        }
+        long endPosn = ftell(mZipFp);
+        pEntry->setDataInfo(uncompressedLen, endPosn - startPosn,
+            pSourceEntry->getCRC32(), ZipEntry::kCompressDeflated);
+        free(buf);
+    } else {
+        off_t copyLen;
+        copyLen = pSourceEntry->getCompressedLen();
+        if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
+            copyLen += ZipEntry::kDataDescriptorLen;
+
+        if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
+            != NO_ERROR)
+        {
+            ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
+            result = UNKNOWN_ERROR;
+            goto bail;
+        }
+    }
+
+    /*
+     * Update file offsets.
+     */
+    endPosn = ftell(mZipFp);
+
+    /*
+     * Success!  Fill out new values.
+     */
+    pEntry->setLFHOffset(lfhPosn);
+    mEOCD.mNumEntries++;
+    mEOCD.mTotalNumEntries++;
+    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
+    mEOCD.mCentralDirOffset = endPosn;
+
+    /*
+     * Go back and write the LFH.
+     */
+    if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
+        result = UNKNOWN_ERROR;
+        goto bail;
+    }
+    pEntry->mLFH.write(mZipFp);
+
+    /*
+     * Add pEntry to the list.
+     */
+    mEntries.add(pEntry);
+    if (ppEntry != NULL)
+        *ppEntry = pEntry;
+    pEntry = NULL;
+
+    result = NO_ERROR;
+
+bail:
+    delete pEntry;
+    return result;
+}
+
+/*
  * Copy all of the bytes in "src" to "dst".
  *
  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
@@ -744,73 +881,43 @@
     const void* data, size_t size, unsigned long* pCRC32)
 {
     status_t result = NO_ERROR;
-    const size_t kBufSize = 32768;
+    const size_t kBufSize = 1024 * 1024;
     unsigned char* inBuf = NULL;
     unsigned char* outBuf = NULL;
-    z_stream zstream;
+    size_t outSize = 0;
     bool atEof = false;     // no feof() aviailable yet
     unsigned long crc;
-    int zerr;
+    ZopfliOptions options;
+    unsigned char bp = 0;
 
-    /*
-     * Create an input buffer and an output buffer.
-     */
-    inBuf = new unsigned char[kBufSize];
-    outBuf = new unsigned char[kBufSize];
-    if (inBuf == NULL || outBuf == NULL) {
-        result = NO_MEMORY;
-        goto bail;
-    }
-
-    /*
-     * Initialize the zlib stream.
-     */
-    memset(&zstream, 0, sizeof(zstream));
-    zstream.zalloc = Z_NULL;
-    zstream.zfree = Z_NULL;
-    zstream.opaque = Z_NULL;
-    zstream.next_in = NULL;
-    zstream.avail_in = 0;
-    zstream.next_out = outBuf;
-    zstream.avail_out = kBufSize;
-    zstream.data_type = Z_UNKNOWN;
-
-    zerr = deflateInit2(&zstream, Z_BEST_COMPRESSION,
-        Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
-    if (zerr != Z_OK) {
-        result = UNKNOWN_ERROR;
-        if (zerr == Z_VERSION_ERROR) {
-            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
-                ZLIB_VERSION);
-        } else {
-            ALOGD("Call to deflateInit2 failed (zerr=%d)\n", zerr);
-        }
-        goto bail;
-    }
+    ZopfliInitOptions(&options);
 
     crc = crc32(0L, Z_NULL, 0);
 
-    /*
-     * Loop while we have data.
-     */
-    do {
-        size_t getSize;
-        int flush;
+    if (data) {
+        crc = crc32(crc, (const unsigned char*)data, size);
+        ZopfliDeflate(&options, 2, true, (const unsigned char*)data, size, &bp,
+            &outBuf, &outSize);
+    } else {
+        /*
+         * Create an input buffer and an output buffer.
+         */
+        inBuf = new unsigned char[kBufSize];
+        if (inBuf == NULL) {
+            result = NO_MEMORY;
+            goto bail;
+        }
 
-        /* only read if the input buffer is empty */
-        if (zstream.avail_in == 0 && !atEof) {
-            ALOGV("+++ reading %d bytes\n", (int)kBufSize);
-            if (data) {
-                getSize = size > kBufSize ? kBufSize : size;
-                memcpy(inBuf, data, getSize);
-                data = ((const char*)data) + getSize;
-                size -= getSize;
-            } else {
-                getSize = fread(inBuf, 1, kBufSize, srcFp);
-                if (ferror(srcFp)) {
-                    ALOGD("deflate read failed (errno=%d)\n", errno);
-                    goto z_bail;
-                }
+        /*
+         * Loop while we have data.
+         */
+        do {
+            size_t getSize;
+            getSize = fread(inBuf, 1, kBufSize, srcFp);
+            if (ferror(srcFp)) {
+                ALOGD("deflate read failed (errno=%d)\n", errno);
+                delete[] inBuf;
+                goto bail;
             }
             if (getSize < kBufSize) {
                 ALOGV("+++  got %d bytes, EOF reached\n",
@@ -819,51 +926,21 @@
             }
 
             crc = crc32(crc, inBuf, getSize);
+            ZopfliDeflate(&options, 2, atEof, inBuf, getSize, &bp, &outBuf, &outSize);
+        } while (!atEof);
+        delete[] inBuf;
+    }
 
-            zstream.next_in = inBuf;
-            zstream.avail_in = getSize;
-        }
-
-        if (atEof)
-            flush = Z_FINISH;       /* tell zlib that we're done */
-        else
-            flush = Z_NO_FLUSH;     /* more to come! */
-
-        zerr = deflate(&zstream, flush);
-        if (zerr != Z_OK && zerr != Z_STREAM_END) {
-            ALOGD("zlib deflate call failed (zerr=%d)\n", zerr);
-            result = UNKNOWN_ERROR;
-            goto z_bail;
-        }
-
-        /* write when we're full or when we're done */
-        if (zstream.avail_out == 0 ||
-            (zerr == Z_STREAM_END && zstream.avail_out != (uInt) kBufSize))
-        {
-            ALOGV("+++ writing %d bytes\n", (int) (zstream.next_out - outBuf));
-            if (fwrite(outBuf, 1, zstream.next_out - outBuf, dstFp) !=
-                (size_t)(zstream.next_out - outBuf))
-            {
-                ALOGD("write %d failed in deflate\n",
-                    (int) (zstream.next_out - outBuf));
-                goto z_bail;
-            }
-
-            zstream.next_out = outBuf;
-            zstream.avail_out = kBufSize;
-        }
-    } while (zerr == Z_OK);
-
-    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
+    ALOGV("+++ writing %d bytes\n", (int)outSize);
+    if (fwrite(outBuf, 1, outSize, dstFp) != outSize) {
+        ALOGD("write %d failed in deflate\n", (int)outSize);
+        goto bail;
+    }
 
     *pCRC32 = crc;
 
-z_bail:
-    deflateEnd(&zstream);        /* free up any allocated structures */
-
 bail:
-    delete[] inBuf;
-    delete[] outBuf;
+    free(outBuf);
 
     return result;
 }
@@ -1148,7 +1225,7 @@
 #endif
 
 // free the memory when you're done
-void* ZipFile::uncompress(const ZipEntry* entry)
+void* ZipFile::uncompress(const ZipEntry* entry) const
 {
     size_t unlen = entry->getUncompressedLen();
     size_t clen = entry->getCompressedLen();
diff --git a/tools/zipalign/ZipFile.h b/tools/zipalign/ZipFile.h
index 7877550..b99cda5 100644
--- a/tools/zipalign/ZipFile.h
+++ b/tools/zipalign/ZipFile.h
@@ -127,6 +127,15 @@
         int padding, ZipEntry** ppEntry);
 
     /*
+     * Add an entry by copying it from another zip file, recompressing with
+     * Zopfli if already compressed.
+     *
+     * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
+     */
+    status_t addRecompress(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
+        ZipEntry** ppEntry);
+
+    /*
      * Mark an entry as having been removed.  It is not actually deleted
      * from the archive or our internal data structures until flush() is
      * called.
@@ -147,7 +156,7 @@
      */
     //bool uncompress(const ZipEntry* pEntry, void* buf) const;
     //bool uncompress(const ZipEntry* pEntry, FILE* fp) const;
-    void* uncompress(const ZipEntry* pEntry);
+    void* uncompress(const ZipEntry* pEntry) const;
 
     /*
      * Get an entry, by name.  Returns NULL if not found.