split soong_zip into a library and a binary
to make it faster/easier to invoke from other Go programs
(such as multiproduct_kati)
Bug: 67478260
Test: m -j
Change-Id: Idd2671a44290550197c88f53dd11a6dd39c85cc5
diff --git a/zip/zip.go b/zip/zip.go
new file mode 100644
index 0000000..95520fe
--- /dev/null
+++ b/zip/zip.go
@@ -0,0 +1,760 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zip
+
+import (
+ "bytes"
+ "compress/flate"
+ "errors"
+ "fmt"
+ "hash/crc32"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "path/filepath"
+ "runtime/pprof"
+ "runtime/trace"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+
+ "android/soong/jar"
+ "android/soong/third_party/zip"
+)
+
+// Block size used during parallel compression of a single file.
+const parallelBlockSize = 1 * 1024 * 1024 // 1MB
+
+// Minimum file size to use parallel compression. It requires more
+// flate.Writer allocations, since we can't change the dictionary
+// during Reset
+const minParallelFileSize = parallelBlockSize * 6
+
+// Size of the ZIP compression window (32KB)
+const windowSize = 32 * 1024
+
+type nopCloser struct {
+ io.Writer
+}
+
+func (nopCloser) Close() error {
+ return nil
+}
+
+type byteReaderCloser struct {
+ *bytes.Reader
+ io.Closer
+}
+
+type pathMapping struct {
+ dest, src string
+ zipMethod uint16
+}
+
+type uniqueSet map[string]bool
+
+func (u *uniqueSet) String() string {
+ return `""`
+}
+
+func (u *uniqueSet) Set(s string) error {
+ if _, found := (*u)[s]; found {
+ return fmt.Errorf("File %q was specified twice as a file to not deflate", s)
+ } else {
+ (*u)[s] = true
+ }
+
+ return nil
+}
+
+type FileArg struct {
+ PathPrefixInZip, SourcePrefixToStrip string
+ SourceFiles []string
+ GlobDir string
+}
+
+type FileArgs []FileArg
+
+type ZipWriter struct {
+ time time.Time
+ createdFiles map[string]string
+ createdDirs map[string]string
+ directories bool
+
+ errors chan error
+ writeOps chan chan *zipEntry
+
+ cpuRateLimiter *CPURateLimiter
+ memoryRateLimiter *MemoryRateLimiter
+
+ compressorPool sync.Pool
+ compLevel int
+}
+
+type zipEntry struct {
+ fh *zip.FileHeader
+
+ // List of delayed io.Reader
+ futureReaders chan chan io.Reader
+
+ // Only used for passing into the MemoryRateLimiter to ensure we
+ // release as much memory as much as we request
+ allocatedSize int64
+}
+
+type ZipArgs struct {
+ FileArgs FileArgs
+ OutputFilePath string
+ CpuProfileFilePath string
+ TraceFilePath string
+ EmulateJar bool
+ AddDirectoryEntriesToZip bool
+ CompressionLevel int
+ ManifestSourcePath string
+ NumParallelJobs int
+ NonDeflatedFiles map[string]bool
+}
+
+func Run(args ZipArgs) (err error) {
+ if args.CpuProfileFilePath != "" {
+ f, err := os.Create(args.CpuProfileFilePath)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+ defer f.Close()
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+
+ if args.TraceFilePath != "" {
+ f, err := os.Create(args.TraceFilePath)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+ defer f.Close()
+ err = trace.Start(f)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+ defer trace.Stop()
+ }
+
+ if args.OutputFilePath == "" {
+ return fmt.Errorf("output file path must be nonempty")
+ }
+
+ if args.EmulateJar {
+ args.AddDirectoryEntriesToZip = true
+ }
+
+ w := &ZipWriter{
+ time: jar.DefaultTime,
+ createdDirs: make(map[string]string),
+ createdFiles: make(map[string]string),
+ directories: args.AddDirectoryEntriesToZip,
+ compLevel: args.CompressionLevel,
+ }
+ pathMappings := []pathMapping{}
+
+ for _, fa := range args.FileArgs {
+ srcs := fa.SourceFiles
+ if fa.GlobDir != "" {
+ srcs = append(srcs, recursiveGlobFiles(fa.GlobDir)...)
+ }
+ for _, src := range srcs {
+ if err := fillPathPairs(fa.PathPrefixInZip,
+ fa.SourcePrefixToStrip, src, &pathMappings, args.NonDeflatedFiles); err != nil {
+ log.Fatal(err)
+ }
+ }
+ }
+
+ return w.write(args.OutputFilePath, pathMappings, args.ManifestSourcePath, args.EmulateJar, args.NumParallelJobs)
+
+}
+
+func fillPathPairs(prefix, rel, src string, pathMappings *[]pathMapping, nonDeflatedFiles map[string]bool) error {
+ src = strings.TrimSpace(src)
+ if src == "" {
+ return nil
+ }
+ src = filepath.Clean(src)
+ dest, err := filepath.Rel(rel, src)
+ if err != nil {
+ return err
+ }
+ dest = filepath.Join(prefix, dest)
+
+ zipMethod := zip.Deflate
+ if _, found := nonDeflatedFiles[dest]; found {
+ zipMethod = zip.Store
+ }
+ *pathMappings = append(*pathMappings,
+ pathMapping{dest: dest, src: src, zipMethod: zipMethod})
+
+ return nil
+}
+
+func jarSort(mappings []pathMapping) {
+ less := func(i int, j int) (smaller bool) {
+ return jar.EntryNamesLess(mappings[i].dest, mappings[j].dest)
+ }
+ sort.SliceStable(mappings, less)
+}
+
+type readerSeekerCloser interface {
+ io.Reader
+ io.ReaderAt
+ io.Closer
+ io.Seeker
+}
+
+func (z *ZipWriter) write(out string, pathMappings []pathMapping, manifest string, emulateJar bool, parallelJobs int) error {
+ f, err := os.Create(out)
+ if err != nil {
+ return err
+ }
+
+ defer f.Close()
+ defer func() {
+ if err != nil {
+ os.Remove(out)
+ }
+ }()
+
+ z.errors = make(chan error)
+ defer close(z.errors)
+
+ // This channel size can be essentially unlimited -- it's used as a fifo
+ // queue decouple the CPU and IO loads. Directories don't require any
+ // compression time, but still cost some IO. Similar with small files that
+ // can be very fast to compress. Some files that are more difficult to
+ // compress won't take a corresponding longer time writing out.
+ //
+ // The optimum size here depends on your CPU and IO characteristics, and
+ // the the layout of your zip file. 1000 was chosen mostly at random as
+ // something that worked reasonably well for a test file.
+ //
+ // The RateLimit object will put the upper bounds on the number of
+ // parallel compressions and outstanding buffers.
+ z.writeOps = make(chan chan *zipEntry, 1000)
+ z.cpuRateLimiter = NewCPURateLimiter(int64(parallelJobs))
+ z.memoryRateLimiter = NewMemoryRateLimiter(0)
+ defer func() {
+ z.cpuRateLimiter.Stop()
+ z.memoryRateLimiter.Stop()
+ }()
+
+ if manifest != "" && !emulateJar {
+ return errors.New("must specify --jar when specifying a manifest via -m")
+ }
+
+ if emulateJar {
+ // manifest may be empty, in which case addManifest will fill in a default
+ pathMappings = append(pathMappings, pathMapping{jar.ManifestFile, manifest, zip.Deflate})
+
+ jarSort(pathMappings)
+ }
+
+ go func() {
+ var err error
+ defer close(z.writeOps)
+
+ for _, ele := range pathMappings {
+ if emulateJar && ele.dest == jar.ManifestFile {
+ err = z.addManifest(ele.dest, ele.src, ele.zipMethod)
+ } else {
+ err = z.addFile(ele.dest, ele.src, ele.zipMethod, emulateJar)
+ }
+ if err != nil {
+ z.errors <- err
+ return
+ }
+ }
+ }()
+
+ zipw := zip.NewWriter(f)
+
+ var currentWriteOpChan chan *zipEntry
+ var currentWriter io.WriteCloser
+ var currentReaders chan chan io.Reader
+ var currentReader chan io.Reader
+ var done bool
+
+ for !done {
+ var writeOpsChan chan chan *zipEntry
+ var writeOpChan chan *zipEntry
+ var readersChan chan chan io.Reader
+
+ if currentReader != nil {
+ // Only read and process errors
+ } else if currentReaders != nil {
+ readersChan = currentReaders
+ } else if currentWriteOpChan != nil {
+ writeOpChan = currentWriteOpChan
+ } else {
+ writeOpsChan = z.writeOps
+ }
+
+ select {
+ case writeOp, ok := <-writeOpsChan:
+ if !ok {
+ done = true
+ }
+
+ currentWriteOpChan = writeOp
+
+ case op := <-writeOpChan:
+ currentWriteOpChan = nil
+
+ if op.fh.Method == zip.Deflate {
+ currentWriter, err = zipw.CreateCompressedHeader(op.fh)
+ } else {
+ var zw io.Writer
+
+ op.fh.CompressedSize64 = op.fh.UncompressedSize64
+
+ zw, err = zipw.CreateHeaderAndroid(op.fh)
+ currentWriter = nopCloser{zw}
+ }
+ if err != nil {
+ return err
+ }
+
+ currentReaders = op.futureReaders
+ if op.futureReaders == nil {
+ currentWriter.Close()
+ currentWriter = nil
+ }
+ z.memoryRateLimiter.Finish(op.allocatedSize)
+
+ case futureReader, ok := <-readersChan:
+ if !ok {
+ // Done with reading
+ currentWriter.Close()
+ currentWriter = nil
+ currentReaders = nil
+ }
+
+ currentReader = futureReader
+
+ case reader := <-currentReader:
+ _, err = io.Copy(currentWriter, reader)
+ if err != nil {
+ return err
+ }
+
+ currentReader = nil
+
+ case err = <-z.errors:
+ return err
+ }
+ }
+
+ // One last chance to catch an error
+ select {
+ case err = <-z.errors:
+ return err
+ default:
+ zipw.Close()
+ return nil
+ }
+}
+
+// imports (possibly with compression) <src> into the zip at sub-path <dest>
+func (z *ZipWriter) addFile(dest, src string, method uint16, emulateJar bool) error {
+ var fileSize int64
+ var executable bool
+
+ if s, err := os.Lstat(src); err != nil {
+ return err
+ } else if s.IsDir() {
+ if z.directories {
+ return z.writeDirectory(dest, src, emulateJar)
+ }
+ return nil
+ } else {
+ if err := z.writeDirectory(filepath.Dir(dest), src, emulateJar); err != nil {
+ return err
+ }
+
+ if prev, exists := z.createdDirs[dest]; exists {
+ return fmt.Errorf("destination %q is both a directory %q and a file %q", dest, prev, src)
+ }
+ if prev, exists := z.createdFiles[dest]; exists {
+ return fmt.Errorf("destination %q has two files %q and %q", dest, prev, src)
+ }
+
+ z.createdFiles[dest] = src
+
+ if s.Mode()&os.ModeSymlink != 0 {
+ return z.writeSymlink(dest, src)
+ } else if !s.Mode().IsRegular() {
+ return fmt.Errorf("%s is not a file, directory, or symlink", src)
+ }
+
+ fileSize = s.Size()
+ executable = s.Mode()&0100 != 0
+ }
+
+ r, err := os.Open(src)
+ if err != nil {
+ return err
+ }
+
+ header := &zip.FileHeader{
+ Name: dest,
+ Method: method,
+ UncompressedSize64: uint64(fileSize),
+ }
+
+ if executable {
+ header.SetMode(0700)
+ }
+
+ return z.writeFileContents(header, r)
+}
+
+func (z *ZipWriter) addManifest(dest string, src string, method uint16) error {
+ if prev, exists := z.createdDirs[dest]; exists {
+ return fmt.Errorf("destination %q is both a directory %q and a file %q", dest, prev, src)
+ }
+ if prev, exists := z.createdFiles[dest]; exists {
+ return fmt.Errorf("destination %q has two files %q and %q", dest, prev, src)
+ }
+
+ if err := z.writeDirectory(filepath.Dir(dest), src, true); err != nil {
+ return err
+ }
+
+ fh, buf, err := jar.ManifestFileContents(src)
+ if err != nil {
+ return err
+ }
+
+ reader := &byteReaderCloser{bytes.NewReader(buf), ioutil.NopCloser(nil)}
+
+ return z.writeFileContents(fh, reader)
+}
+
+func (z *ZipWriter) writeFileContents(header *zip.FileHeader, r readerSeekerCloser) (err error) {
+
+ header.SetModTime(z.time)
+
+ compressChan := make(chan *zipEntry, 1)
+ z.writeOps <- compressChan
+
+ // Pre-fill a zipEntry, it will be sent in the compressChan once
+ // we're sure about the Method and CRC.
+ ze := &zipEntry{
+ fh: header,
+ }
+
+ ze.allocatedSize = int64(header.UncompressedSize64)
+ z.cpuRateLimiter.Request()
+ z.memoryRateLimiter.Request(ze.allocatedSize)
+
+ fileSize := int64(header.UncompressedSize64)
+ if fileSize == 0 {
+ fileSize = int64(header.UncompressedSize)
+ }
+
+ if header.Method == zip.Deflate && fileSize >= minParallelFileSize {
+ wg := new(sync.WaitGroup)
+
+ // Allocate enough buffer to hold all readers. We'll limit
+ // this based on actual buffer sizes in RateLimit.
+ ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
+
+ // Calculate the CRC in the background, since reading the entire
+ // file could take a while.
+ //
+ // We could split this up into chunks as well, but it's faster
+ // than the compression. Due to the Go Zip API, we also need to
+ // know the result before we can begin writing the compressed
+ // data out to the zipfile.
+ wg.Add(1)
+ go z.crcFile(r, ze, compressChan, wg)
+
+ for start := int64(0); start < fileSize; start += parallelBlockSize {
+ sr := io.NewSectionReader(r, start, parallelBlockSize)
+ resultChan := make(chan io.Reader, 1)
+ ze.futureReaders <- resultChan
+
+ z.cpuRateLimiter.Request()
+
+ last := !(start+parallelBlockSize < fileSize)
+ var dict []byte
+ if start >= windowSize {
+ dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize))
+ if err != nil {
+ return err
+ }
+ }
+
+ wg.Add(1)
+ go z.compressPartialFile(sr, dict, last, resultChan, wg)
+ }
+
+ close(ze.futureReaders)
+
+ // Close the file handle after all readers are done
+ go func(wg *sync.WaitGroup, closer io.Closer) {
+ wg.Wait()
+ closer.Close()
+ }(wg, r)
+ } else {
+ go func() {
+ z.compressWholeFile(ze, r, compressChan)
+ r.Close()
+ }()
+ }
+
+ return nil
+}
+
+func (z *ZipWriter) crcFile(r io.Reader, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) {
+ defer wg.Done()
+ defer z.cpuRateLimiter.Finish()
+
+ crc := crc32.NewIEEE()
+ _, err := io.Copy(crc, r)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+
+ ze.fh.CRC32 = crc.Sum32()
+ resultChan <- ze
+ close(resultChan)
+}
+
+func (z *ZipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, resultChan chan io.Reader, wg *sync.WaitGroup) {
+ defer wg.Done()
+
+ result, err := z.compressBlock(r, dict, last)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+
+ z.cpuRateLimiter.Finish()
+
+ resultChan <- result
+}
+
+func (z *ZipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) {
+ buf := new(bytes.Buffer)
+ var fw *flate.Writer
+ var err error
+ if len(dict) > 0 {
+ // There's no way to Reset a Writer with a new dictionary, so
+ // don't use the Pool
+ fw, err = flate.NewWriterDict(buf, z.compLevel, dict)
+ } else {
+ var ok bool
+ if fw, ok = z.compressorPool.Get().(*flate.Writer); ok {
+ fw.Reset(buf)
+ } else {
+ fw, err = flate.NewWriter(buf, z.compLevel)
+ }
+ defer z.compressorPool.Put(fw)
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ _, err = io.Copy(fw, r)
+ if err != nil {
+ return nil, err
+ }
+ if last {
+ fw.Close()
+ } else {
+ fw.Flush()
+ }
+
+ return buf, nil
+}
+
+func (z *ZipWriter) compressWholeFile(ze *zipEntry, r io.ReadSeeker, compressChan chan *zipEntry) {
+
+ crc := crc32.NewIEEE()
+ _, err := io.Copy(crc, r)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+
+ ze.fh.CRC32 = crc.Sum32()
+
+ _, err = r.Seek(0, 0)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+
+ readFile := func(reader io.ReadSeeker) ([]byte, error) {
+ _, err := reader.Seek(0, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ buf, err := ioutil.ReadAll(reader)
+ if err != nil {
+ return nil, err
+ }
+
+ return buf, nil
+ }
+
+ ze.futureReaders = make(chan chan io.Reader, 1)
+ futureReader := make(chan io.Reader, 1)
+ ze.futureReaders <- futureReader
+ close(ze.futureReaders)
+
+ if ze.fh.Method == zip.Deflate {
+ compressed, err := z.compressBlock(r, nil, true)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+ if uint64(compressed.Len()) < ze.fh.UncompressedSize64 {
+ futureReader <- compressed
+ } else {
+ buf, err := readFile(r)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+ ze.fh.Method = zip.Store
+ futureReader <- bytes.NewReader(buf)
+ }
+ } else {
+ buf, err := readFile(r)
+ if err != nil {
+ z.errors <- err
+ return
+ }
+ ze.fh.Method = zip.Store
+ futureReader <- bytes.NewReader(buf)
+ }
+
+ z.cpuRateLimiter.Finish()
+
+ close(futureReader)
+
+ compressChan <- ze
+ close(compressChan)
+}
+
+// writeDirectory annotates that dir is a directory created for the src file or directory, and adds
+// the directory entry to the zip file if directories are enabled.
+func (z *ZipWriter) writeDirectory(dir string, src string, emulateJar bool) error {
+ // clean the input
+ dir = filepath.Clean(dir)
+
+ // discover any uncreated directories in the path
+ zipDirs := []string{}
+ for dir != "" && dir != "." {
+ if _, exists := z.createdDirs[dir]; exists {
+ break
+ }
+
+ if prev, exists := z.createdFiles[dir]; exists {
+ return fmt.Errorf("destination %q is both a directory %q and a file %q", dir, src, prev)
+ }
+
+ z.createdDirs[dir] = src
+ // parent directories precede their children
+ zipDirs = append([]string{dir}, zipDirs...)
+
+ dir = filepath.Dir(dir)
+ }
+
+ if z.directories {
+ // make a directory entry for each uncreated directory
+ for _, cleanDir := range zipDirs {
+ var dirHeader *zip.FileHeader
+
+ if emulateJar && cleanDir+"/" == jar.MetaDir {
+ dirHeader = jar.MetaDirFileHeader()
+ } else {
+ dirHeader = &zip.FileHeader{
+ Name: cleanDir + "/",
+ }
+ dirHeader.SetMode(0700 | os.ModeDir)
+ }
+
+ dirHeader.SetModTime(z.time)
+
+ ze := make(chan *zipEntry, 1)
+ ze <- &zipEntry{
+ fh: dirHeader,
+ }
+ close(ze)
+ z.writeOps <- ze
+ }
+ }
+
+ return nil
+}
+
+func (z *ZipWriter) writeSymlink(rel, file string) error {
+ fileHeader := &zip.FileHeader{
+ Name: rel,
+ }
+ fileHeader.SetModTime(z.time)
+ fileHeader.SetMode(0700 | os.ModeSymlink)
+
+ dest, err := os.Readlink(file)
+ if err != nil {
+ return err
+ }
+
+ ze := make(chan *zipEntry, 1)
+ futureReaders := make(chan chan io.Reader, 1)
+ futureReader := make(chan io.Reader, 1)
+ futureReaders <- futureReader
+ close(futureReaders)
+ futureReader <- bytes.NewBufferString(dest)
+ close(futureReader)
+
+ ze <- &zipEntry{
+ fh: fileHeader,
+ futureReaders: futureReaders,
+ }
+ close(ze)
+ z.writeOps <- ze
+
+ return nil
+}
+
+func recursiveGlobFiles(path string) []string {
+ var files []string
+ filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
+ if !info.IsDir() {
+ files = append(files, path)
+ }
+ return nil
+ })
+
+ return files
+}