Merge "Allow reading zipfile list from a file."
diff --git a/cmd/merge_zips/Android.bp b/cmd/merge_zips/Android.bp
index ab658fd..f70c86e 100644
--- a/cmd/merge_zips/Android.bp
+++ b/cmd/merge_zips/Android.bp
@@ -18,6 +18,7 @@
       "android-archive-zip",
       "blueprint-pathtools",
       "soong-jar",
+      "soong-zip",
     ],
     srcs: [
         "merge_zips.go",
diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go
index 68fe259..27179cb 100644
--- a/cmd/merge_zips/merge_zips.go
+++ b/cmd/merge_zips/merge_zips.go
@@ -30,8 +30,566 @@
 
 	"android/soong/jar"
 	"android/soong/third_party/zip"
+	soongZip "android/soong/zip"
 )
 
+// Input zip: we can open it, close it, and obtain an array of entries
+type InputZip interface {
+	Name() string
+	Open() error
+	Close() error
+	Entries() []*zip.File
+	IsOpen() bool
+}
+
+// An entry that can be written to the output zip
+type ZipEntryContents interface {
+	String() string
+	IsDir() bool
+	CRC32() uint32
+	Size() uint64
+	WriteToZip(dest string, zw *zip.Writer) error
+}
+
+// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip
+// identified by the input zip and the index of the entry in its entries array
+type ZipEntryFromZip struct {
+	inputZip InputZip
+	index    int
+	name     string
+	isDir    bool
+	crc32    uint32
+	size     uint64
+}
+
+func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip {
+	fi := inputZip.Entries()[entryIndex]
+	newEntry := ZipEntryFromZip{inputZip: inputZip,
+		index: entryIndex,
+		name:  fi.Name,
+		isDir: fi.FileInfo().IsDir(),
+		crc32: fi.CRC32,
+		size:  fi.UncompressedSize64,
+	}
+	return &newEntry
+}
+
+func (ze ZipEntryFromZip) String() string {
+	return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name)
+}
+
+func (ze ZipEntryFromZip) IsDir() bool {
+	return ze.isDir
+}
+
+func (ze ZipEntryFromZip) CRC32() uint32 {
+	return ze.crc32
+}
+
+func (ze ZipEntryFromZip) Size() uint64 {
+	return ze.size
+}
+
+func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error {
+	if err := ze.inputZip.Open(); err != nil {
+		return err
+	}
+	return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest)
+}
+
+// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte
+type ZipEntryFromBuffer struct {
+	fh      *zip.FileHeader
+	content []byte
+}
+
+func (be ZipEntryFromBuffer) String() string {
+	return "internal buffer"
+}
+
+func (be ZipEntryFromBuffer) IsDir() bool {
+	return be.fh.FileInfo().IsDir()
+}
+
+func (be ZipEntryFromBuffer) CRC32() uint32 {
+	return crc32.ChecksumIEEE(be.content)
+}
+
+func (be ZipEntryFromBuffer) Size() uint64 {
+	return uint64(len(be.content))
+}
+
+func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error {
+	w, err := zw.CreateHeader(be.fh)
+	if err != nil {
+		return err
+	}
+
+	if !be.IsDir() {
+		_, err = w.Write(be.content)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Processing state.
+type OutputZip struct {
+	outputWriter     *zip.Writer
+	stripDirEntries  bool
+	emulateJar       bool
+	sortEntries      bool
+	ignoreDuplicates bool
+	excludeDirs      []string
+	excludeFiles     []string
+	sourceByDest     map[string]ZipEntryContents
+}
+
+func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip {
+	return &OutputZip{
+		outputWriter:     outputWriter,
+		stripDirEntries:  stripDirEntries,
+		emulateJar:       emulateJar,
+		sortEntries:      sortEntries,
+		sourceByDest:     make(map[string]ZipEntryContents, 0),
+		ignoreDuplicates: ignoreDuplicates,
+	}
+}
+
+func (oz *OutputZip) setExcludeDirs(excludeDirs []string) {
+	oz.excludeDirs = make([]string, len(excludeDirs))
+	for i, dir := range excludeDirs {
+		oz.excludeDirs[i] = filepath.Clean(dir)
+	}
+}
+
+func (oz *OutputZip) setExcludeFiles(excludeFiles []string) {
+	oz.excludeFiles = excludeFiles
+}
+
+// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents
+// if entry with given name already exists.
+func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) {
+	if existingSource, exists := oz.sourceByDest[name]; exists {
+		return existingSource, nil
+	}
+	oz.sourceByDest[name] = source
+	// Delay writing an entry if entries need to be rearranged.
+	if oz.emulateJar || oz.sortEntries {
+		return nil, nil
+	}
+	return nil, source.WriteToZip(name, oz.outputWriter)
+}
+
+// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file
+func (oz *OutputZip) addManifest(manifestPath string) error {
+	if !oz.stripDirEntries {
+		if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil {
+			return err
+		}
+	}
+	contents, err := ioutil.ReadFile(manifestPath)
+	if err == nil {
+		fh, buf, err := jar.ManifestFileContents(contents)
+		if err == nil {
+			_, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf})
+		}
+	}
+	return err
+}
+
+// Adds an entry with given name and contents read from given file
+func (oz *OutputZip) addZipEntryFromFile(name string, path string) error {
+	buf, err := ioutil.ReadFile(path)
+	if err == nil {
+		fh := &zip.FileHeader{
+			Name:               name,
+			Method:             zip.Store,
+			UncompressedSize64: uint64(len(buf)),
+		}
+		fh.SetMode(0700)
+		fh.SetModTime(jar.DefaultTime)
+		_, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf})
+	}
+	return err
+}
+
+func (oz *OutputZip) addEmptyEntry(entry string) error {
+	var emptyBuf []byte
+	fh := &zip.FileHeader{
+		Name:               entry,
+		Method:             zip.Store,
+		UncompressedSize64: uint64(len(emptyBuf)),
+	}
+	fh.SetMode(0700)
+	fh.SetModTime(jar.DefaultTime)
+	_, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf})
+	return err
+}
+
+// Returns true if given entry is to be excluded
+func (oz *OutputZip) isEntryExcluded(name string) bool {
+	for _, dir := range oz.excludeDirs {
+		dir = filepath.Clean(dir)
+		patterns := []string{
+			dir + "/",      // the directory itself
+			dir + "/**/*",  // files recursively in the directory
+			dir + "/**/*/", // directories recursively in the directory
+		}
+
+		for _, pattern := range patterns {
+			match, err := pathtools.Match(pattern, name)
+			if err != nil {
+				panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+			}
+			if match {
+				if oz.emulateJar {
+					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
+					// requested.
+					// TODO(ccross): which files does this affect?
+					if name != jar.MetaDir && name != jar.ManifestFile {
+						return true
+					}
+				}
+				return true
+			}
+		}
+	}
+
+	for _, pattern := range oz.excludeFiles {
+		match, err := pathtools.Match(pattern, name)
+		if err != nil {
+			panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+		}
+		if match {
+			return true
+		}
+	}
+	return false
+}
+
+// Creates a zip entry whose contents is an entry from the given input zip.
+func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error {
+	entry := NewZipEntryFromZip(inputZip, index)
+	if oz.stripDirEntries && entry.IsDir() {
+		return nil
+	}
+	existingEntry, err := oz.addZipEntry(entry.name, entry)
+	if err != nil {
+		return err
+	}
+	if existingEntry == nil {
+		return nil
+	}
+
+	// File types should match
+	if existingEntry.IsDir() != entry.IsDir() {
+		return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
+			entry.name, existingEntry, entry)
+	}
+
+	if oz.ignoreDuplicates ||
+		// Skip manifest and module info files that are not from the first input file
+		(oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) ||
+		// Identical entries
+		(existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) ||
+		// Directory entries
+		entry.IsDir() {
+		return nil
+	}
+
+	return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name())
+}
+
+func (oz *OutputZip) entriesArray() []string {
+	entries := make([]string, len(oz.sourceByDest))
+	i := 0
+	for entry := range oz.sourceByDest {
+		entries[i] = entry
+		i++
+	}
+	return entries
+}
+
+func (oz *OutputZip) jarSorted() []string {
+	entries := oz.entriesArray()
+	sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) })
+	return entries
+}
+
+func (oz *OutputZip) alphanumericSorted() []string {
+	entries := oz.entriesArray()
+	sort.Strings(entries)
+	return entries
+}
+
+func (oz *OutputZip) writeEntries(entries []string) error {
+	for _, entry := range entries {
+		source, _ := oz.sourceByDest[entry]
+		if err := source.WriteToZip(entry, oz.outputWriter); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
+	// the runfiles packages needs to be populated with "__init__.py".
+	// the runfiles dirs have been treated as packages.
+	allPackages := make(map[string]bool)
+	initedPackages := make(map[string]bool)
+	getPackage := func(path string) string {
+		ret := filepath.Dir(path)
+		// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
+		if ret == "." || ret == "/" {
+			return ""
+		}
+		return ret
+	}
+
+	// put existing __init__.py files to a set first. This set is used for preventing
+	// generated __init__.py files from overwriting existing ones.
+	for _, inputZip := range inputZips {
+		if err := inputZip.Open(); err != nil {
+			return nil, err
+		}
+		for _, file := range inputZip.Entries() {
+			pyPkg := getPackage(file.Name)
+			if filepath.Base(file.Name) == "__init__.py" {
+				if _, found := initedPackages[pyPkg]; found {
+					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name))
+				}
+				initedPackages[pyPkg] = true
+			}
+			for pyPkg != "" {
+				if _, found := allPackages[pyPkg]; found {
+					break
+				}
+				allPackages[pyPkg] = true
+				pyPkg = getPackage(pyPkg)
+			}
+		}
+	}
+	noInitPackages := make([]string, 0)
+	for pyPkg := range allPackages {
+		if _, found := initedPackages[pyPkg]; !found {
+			noInitPackages = append(noInitPackages, pyPkg)
+		}
+	}
+	return noInitPackages, nil
+}
+
+// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order.
+type ManagedInputZip struct {
+	owner        *InputZipsManager
+	realInputZip InputZip
+	older        *ManagedInputZip
+	newer        *ManagedInputZip
+}
+
+// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened,
+// may close some other InputZip to limit the number of open ones.
+type InputZipsManager struct {
+	inputZips     []*ManagedInputZip
+	nOpenZips     int
+	maxOpenZips   int
+	openInputZips *ManagedInputZip
+}
+
+func (miz *ManagedInputZip) unlink() {
+	olderMiz := miz.older
+	newerMiz := miz.newer
+	if newerMiz.older != miz || olderMiz.newer != miz {
+		panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v",
+			miz, miz, newerMiz, newerMiz, olderMiz, olderMiz))
+	}
+	olderMiz.newer = newerMiz
+	newerMiz.older = olderMiz
+	miz.newer = nil
+	miz.older = nil
+}
+
+func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) {
+	if olderMiz.newer != nil || olderMiz.older != nil {
+		panic(fmt.Errorf("inputZip is already open"))
+	}
+	oldOlderMiz := miz.older
+	if oldOlderMiz.newer != miz {
+		panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, oldOlderMiz))
+	}
+	miz.older = olderMiz
+	olderMiz.older = oldOlderMiz
+	oldOlderMiz.newer = olderMiz
+	olderMiz.newer = miz
+}
+
+func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager {
+	if maxOpenZips < 3 {
+		panic(fmt.Errorf("open zips limit should be above 3"))
+	}
+	// In the dummy element .older points to the most recently opened InputZip, and .newer points to the oldest.
+	head := new(ManagedInputZip)
+	head.older = head
+	head.newer = head
+	return &InputZipsManager{
+		inputZips:     make([]*ManagedInputZip, 0, nInputZips),
+		maxOpenZips:   maxOpenZips,
+		openInputZips: head,
+	}
+}
+
+// InputZip factory
+func (izm *InputZipsManager) Manage(inz InputZip) InputZip {
+	iz := &ManagedInputZip{owner: izm, realInputZip: inz}
+	izm.inputZips = append(izm.inputZips, iz)
+	return iz
+}
+
+// Opens or reopens ManagedInputZip.
+func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error {
+	if miz.realInputZip.IsOpen() {
+		if miz != izm.openInputZips {
+			miz.unlink()
+			izm.openInputZips.link(miz)
+		}
+		return nil
+	}
+	if izm.nOpenZips >= izm.maxOpenZips {
+		if err := izm.close(izm.openInputZips.older); err != nil {
+			return err
+		}
+	}
+	if err := miz.realInputZip.Open(); err != nil {
+		return err
+	}
+	izm.openInputZips.link(miz)
+	izm.nOpenZips++
+	return nil
+}
+
+func (izm *InputZipsManager) close(miz *ManagedInputZip) error {
+	if miz.IsOpen() {
+		err := miz.realInputZip.Close()
+		izm.nOpenZips--
+		miz.unlink()
+		return err
+	}
+	return nil
+}
+
+// Checks that openInputZips deque is valid
+func (izm *InputZipsManager) checkOpenZipsDeque() {
+	nReallyOpen := 0
+	el := izm.openInputZips
+	for {
+		elNext := el.older
+		if elNext.newer != el {
+			panic(fmt.Errorf("Element:\n  %p: %v\nNext:\n  %p %v", el, el, elNext, elNext))
+		}
+		if elNext == izm.openInputZips {
+			break
+		}
+		el = elNext
+		if !el.IsOpen() {
+			panic(fmt.Errorf("Found unopened element"))
+		}
+		nReallyOpen++
+		if nReallyOpen > izm.nOpenZips {
+			panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+		}
+	}
+	if nReallyOpen > izm.nOpenZips {
+		panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+	}
+}
+
+func (miz *ManagedInputZip) Name() string {
+	return miz.realInputZip.Name()
+}
+
+func (miz *ManagedInputZip) Open() error {
+	return miz.owner.reopen(miz)
+}
+
+func (miz *ManagedInputZip) Close() error {
+	return miz.owner.close(miz)
+}
+
+func (miz *ManagedInputZip) IsOpen() bool {
+	return miz.realInputZip.IsOpen()
+}
+
+func (miz *ManagedInputZip) Entries() []*zip.File {
+	if !miz.IsOpen() {
+		panic(fmt.Errorf("%s: is not open", miz.Name()))
+	}
+	return miz.realInputZip.Entries()
+}
+
+// Actual processing.
+func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string,
+	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
+	excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error {
+
+	out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates)
+	out.setExcludeFiles(excludeFiles)
+	out.setExcludeDirs(excludeDirs)
+	if manifest != "" {
+		if err := out.addManifest(manifest); err != nil {
+			return err
+		}
+	}
+	if pyMain != "" {
+		if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil {
+			return err
+		}
+	}
+
+	if emulatePar {
+		noInitPackages, err := out.getUninitializedPythonPackages(inputZips)
+		if err != nil {
+			return err
+		}
+		for _, uninitializedPyPackage := range noInitPackages {
+			if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Finally, add entries from all the input zips.
+	for _, inputZip := range inputZips {
+		_, copyFully := zipsToNotStrip[inputZip.Name()]
+		if err := inputZip.Open(); err != nil {
+			return err
+		}
+
+		for i, entry := range inputZip.Entries() {
+			if copyFully || !out.isEntryExcluded(entry.Name) {
+				if err := out.copyEntry(inputZip, i); err != nil {
+					return err
+				}
+			}
+		}
+		// Unless we need to rearrange the entries, the input zip can now be closed.
+		if !(emulateJar || sortEntries) {
+			if err := inputZip.Close(); err != nil {
+				return err
+			}
+		}
+	}
+
+	if emulateJar {
+		return out.writeEntries(out.jarSorted())
+	} else if sortEntries {
+		return out.writeEntries(out.alphanumericSorted())
+	}
+	return nil
+}
+
+// Process command line
 type fileList []string
 
 func (f *fileList) String() string {
@@ -50,9 +608,8 @@
 	return `""`
 }
 
-func (s zipsToNotStripSet) Set(zip_path string) error {
-	s[zip_path] = true
-
+func (s zipsToNotStripSet) Set(path string) error {
+	s[path] = true
 	return nil
 }
 
@@ -60,8 +617,8 @@
 	sortEntries      = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
 	emulateJar       = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
 	emulatePar       = flag.Bool("p", false, "merge zip entries based on par format")
-	stripDirs        fileList
-	stripFiles       fileList
+	excludeDirs      fileList
+	excludeFiles     fileList
 	zipsToNotStrip   = make(zipsToNotStripSet)
 	stripDirEntries  = flag.Bool("D", false, "strip directory entries from the output zip file")
 	manifest         = flag.String("m", "", "manifest file to insert in jar")
@@ -71,14 +628,52 @@
 )
 
 func init() {
-	flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
-	flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
+	flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
+	flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
 	flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
 }
 
+type FileInputZip struct {
+	name   string
+	reader *zip.ReadCloser
+}
+
+func (fiz *FileInputZip) Name() string {
+	return fiz.name
+}
+
+func (fiz *FileInputZip) Close() error {
+	if fiz.IsOpen() {
+		reader := fiz.reader
+		fiz.reader = nil
+		return reader.Close()
+	}
+	return nil
+}
+
+func (fiz *FileInputZip) Entries() []*zip.File {
+	if !fiz.IsOpen() {
+		panic(fmt.Errorf("%s: is not open", fiz.Name()))
+	}
+	return fiz.reader.File
+}
+
+func (fiz *FileInputZip) IsOpen() bool {
+	return fiz.reader != nil
+}
+
+func (fiz *FileInputZip) Open() error {
+	if fiz.IsOpen() {
+		return nil
+	}
+	var err error
+	fiz.reader, err = zip.OpenReader(fiz.Name())
+	return err
+}
+
 func main() {
 	flag.Usage = func() {
-		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]")
+		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]")
 		flag.PrintDefaults()
 	}
 
@@ -90,16 +685,28 @@
 		os.Exit(1)
 	}
 	outputPath := args[0]
-	inputs := args[1:]
+	inputs := make([]string, 0)
+	for _, input := range args[1:] {
+		if input[0] == '@' {
+			bytes, err := ioutil.ReadFile(input[1:])
+			if err != nil {
+				log.Fatal(err)
+			}
+			inputs = append(inputs, soongZip.ReadRespFile(bytes)...)
+			continue
+		}
+		inputs = append(inputs, input)
+		continue
+	}
 
 	log.SetFlags(log.Lshortfile)
 
 	// make writer
-	output, err := os.Create(outputPath)
+	outputZip, err := os.Create(outputPath)
 	if err != nil {
 		log.Fatal(err)
 	}
-	defer output.Close()
+	defer outputZip.Close()
 
 	var offset int64
 	if *prefix != "" {
@@ -107,13 +714,13 @@
 		if err != nil {
 			log.Fatal(err)
 		}
-		offset, err = io.Copy(output, prefixFile)
+		offset, err = io.Copy(outputZip, prefixFile)
 		if err != nil {
 			log.Fatal(err)
 		}
 	}
 
-	writer := zip.NewWriter(output)
+	writer := zip.NewWriter(outputZip)
 	defer func() {
 		err := writer.Close()
 		if err != nil {
@@ -122,18 +729,6 @@
 	}()
 	writer.SetOffset(offset)
 
-	// make readers
-	readers := []namedZipReader{}
-	for _, input := range inputs {
-		reader, err := zip.OpenReader(input)
-		if err != nil {
-			log.Fatal(err)
-		}
-		defer reader.Close()
-		namedReader := namedZipReader{path: input, reader: &reader.Reader}
-		readers = append(readers, namedReader)
-	}
-
 	if *manifest != "" && !*emulateJar {
 		log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
 	}
@@ -143,344 +738,15 @@
 	}
 
 	// do merge
-	err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
-		*stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
+	inputZipsManager := NewInputZipsManager(len(inputs), 1000)
+	inputZips := make([]InputZip, len(inputs))
+	for i, input := range inputs {
+		inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input})
+	}
+	err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
+		*stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs),
+		map[string]bool(zipsToNotStrip))
 	if err != nil {
 		log.Fatal(err)
 	}
 }
-
-// a namedZipReader reads a .zip file and can say which file it's reading
-type namedZipReader struct {
-	path   string
-	reader *zip.Reader
-}
-
-// a zipEntryPath refers to a file contained in a zip
-type zipEntryPath struct {
-	zipName   string
-	entryName string
-}
-
-func (p zipEntryPath) String() string {
-	return p.zipName + "/" + p.entryName
-}
-
-// a zipEntry is a zipSource that pulls its content from another zip
-type zipEntry struct {
-	path    zipEntryPath
-	content *zip.File
-}
-
-func (ze zipEntry) String() string {
-	return ze.path.String()
-}
-
-func (ze zipEntry) IsDir() bool {
-	return ze.content.FileInfo().IsDir()
-}
-
-func (ze zipEntry) CRC32() uint32 {
-	return ze.content.FileHeader.CRC32
-}
-
-func (ze zipEntry) Size() uint64 {
-	return ze.content.FileHeader.UncompressedSize64
-}
-
-func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
-	return zw.CopyFrom(ze.content, dest)
-}
-
-// a bufferEntry is a zipSource that pulls its content from a []byte
-type bufferEntry struct {
-	fh      *zip.FileHeader
-	content []byte
-}
-
-func (be bufferEntry) String() string {
-	return "internal buffer"
-}
-
-func (be bufferEntry) IsDir() bool {
-	return be.fh.FileInfo().IsDir()
-}
-
-func (be bufferEntry) CRC32() uint32 {
-	return crc32.ChecksumIEEE(be.content)
-}
-
-func (be bufferEntry) Size() uint64 {
-	return uint64(len(be.content))
-}
-
-func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
-	w, err := zw.CreateHeader(be.fh)
-	if err != nil {
-		return err
-	}
-
-	if !be.IsDir() {
-		_, err = w.Write(be.content)
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-type zipSource interface {
-	String() string
-	IsDir() bool
-	CRC32() uint32
-	Size() uint64
-	WriteToZip(dest string, zw *zip.Writer) error
-}
-
-// a fileMapping specifies to copy a zip entry from one place to another
-type fileMapping struct {
-	dest   string
-	source zipSource
-}
-
-func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string,
-	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
-	stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
-
-	sourceByDest := make(map[string]zipSource, 0)
-	orderedMappings := []fileMapping{}
-
-	// if dest already exists returns a non-null zipSource for the existing source
-	addMapping := func(dest string, source zipSource) zipSource {
-		mapKey := filepath.Clean(dest)
-		if existingSource, exists := sourceByDest[mapKey]; exists {
-			return existingSource
-		}
-
-		sourceByDest[mapKey] = source
-		orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
-		return nil
-	}
-
-	if manifest != "" {
-		if !stripDirEntries {
-			dirHeader := jar.MetaDirFileHeader()
-			dirSource := bufferEntry{dirHeader, nil}
-			addMapping(jar.MetaDir, dirSource)
-		}
-
-		contents, err := ioutil.ReadFile(manifest)
-		if err != nil {
-			return err
-		}
-
-		fh, buf, err := jar.ManifestFileContents(contents)
-		if err != nil {
-			return err
-		}
-
-		fileSource := bufferEntry{fh, buf}
-		addMapping(jar.ManifestFile, fileSource)
-	}
-
-	if pyMain != "" {
-		buf, err := ioutil.ReadFile(pyMain)
-		if err != nil {
-			return err
-		}
-		fh := &zip.FileHeader{
-			Name:               "__main__.py",
-			Method:             zip.Store,
-			UncompressedSize64: uint64(len(buf)),
-		}
-		fh.SetMode(0700)
-		fh.SetModTime(jar.DefaultTime)
-		fileSource := bufferEntry{fh, buf}
-		addMapping("__main__.py", fileSource)
-	}
-
-	if emulatePar {
-		// the runfiles packages needs to be populated with "__init__.py".
-		newPyPkgs := []string{}
-		// the runfiles dirs have been treated as packages.
-		existingPyPkgSet := make(map[string]bool)
-		// put existing __init__.py files to a set first. This set is used for preventing
-		// generated __init__.py files from overwriting existing ones.
-		for _, namedReader := range readers {
-			for _, file := range namedReader.reader.File {
-				if filepath.Base(file.Name) != "__init__.py" {
-					continue
-				}
-				pyPkg := pathBeforeLastSlash(file.Name)
-				if _, found := existingPyPkgSet[pyPkg]; found {
-					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
-				} else {
-					existingPyPkgSet[pyPkg] = true
-				}
-			}
-		}
-		for _, namedReader := range readers {
-			for _, file := range namedReader.reader.File {
-				var parentPath string /* the path after trimming last "/" */
-				if filepath.Base(file.Name) == "__init__.py" {
-					// for existing __init__.py files, we should trim last "/" for twice.
-					// eg. a/b/c/__init__.py ---> a/b
-					parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
-				} else {
-					parentPath = pathBeforeLastSlash(file.Name)
-				}
-				populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
-			}
-		}
-		for _, pkg := range newPyPkgs {
-			var emptyBuf []byte
-			fh := &zip.FileHeader{
-				Name:               filepath.Join(pkg, "__init__.py"),
-				Method:             zip.Store,
-				UncompressedSize64: uint64(len(emptyBuf)),
-			}
-			fh.SetMode(0700)
-			fh.SetModTime(jar.DefaultTime)
-			fileSource := bufferEntry{fh, emptyBuf}
-			addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
-		}
-	}
-	for _, namedReader := range readers {
-		_, skipStripThisZip := zipsToNotStrip[namedReader.path]
-		for _, file := range namedReader.reader.File {
-			if !skipStripThisZip {
-				if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
-					return err
-				} else if skip {
-					continue
-				}
-			}
-
-			if stripDirEntries && file.FileInfo().IsDir() {
-				continue
-			}
-
-			// check for other files or directories destined for the same path
-			dest := file.Name
-
-			// make a new entry to add
-			source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
-
-			if existingSource := addMapping(dest, source); existingSource != nil {
-				// handle duplicates
-				if existingSource.IsDir() != source.IsDir() {
-					return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
-						dest, existingSource, source)
-				}
-
-				if ignoreDuplicates {
-					continue
-				}
-
-				if emulateJar &&
-					file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
-					// Skip manifest and module info files that are not from the first input file
-					continue
-				}
-
-				if source.IsDir() {
-					continue
-				}
-
-				if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
-					continue
-				}
-
-				return fmt.Errorf("Duplicate path %v found in %v and %v\n",
-					dest, existingSource, source)
-			}
-		}
-	}
-
-	if emulateJar {
-		jarSort(orderedMappings)
-	} else if sortEntries {
-		alphanumericSort(orderedMappings)
-	}
-
-	for _, entry := range orderedMappings {
-		if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// Sets the given directory and all its ancestor directories as Python packages.
-func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
-	for pkgPath != "" {
-		if _, found := existingPyPkgSet[pkgPath]; !found {
-			existingPyPkgSet[pkgPath] = true
-			*newPyPkgs = append(*newPyPkgs, pkgPath)
-			// Gets its ancestor directory by trimming last slash.
-			pkgPath = pathBeforeLastSlash(pkgPath)
-		} else {
-			break
-		}
-	}
-}
-
-func pathBeforeLastSlash(path string) string {
-	ret := filepath.Dir(path)
-	// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
-	if ret == "." || ret == "/" {
-		return ""
-	}
-	return ret
-}
-
-func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
-	for _, dir := range stripDirs {
-		dir = filepath.Clean(dir)
-		patterns := []string{
-			dir + "/",      // the directory itself
-			dir + "/**/*",  // files recursively in the directory
-			dir + "/**/*/", // directories recursively in the directory
-		}
-
-		for _, pattern := range patterns {
-			match, err := pathtools.Match(pattern, name)
-			if err != nil {
-				return false, fmt.Errorf("%s: %s", err.Error(), pattern)
-			} else if match {
-				if emulateJar {
-					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
-					// requested.
-					// TODO(ccross): which files does this affect?
-					if name != jar.MetaDir && name != jar.ManifestFile {
-						return true, nil
-					}
-				}
-				return true, nil
-			}
-		}
-	}
-
-	for _, pattern := range stripFiles {
-		if match, err := pathtools.Match(pattern, name); err != nil {
-			return false, fmt.Errorf("%s: %s", err.Error(), pattern)
-		} else if match {
-			return true, nil
-		}
-	}
-	return false, nil
-}
-
-func jarSort(files []fileMapping) {
-	sort.SliceStable(files, func(i, j int) bool {
-		return jar.EntryNamesLess(files[i].dest, files[j].dest)
-	})
-}
-
-func alphanumericSort(files []fileMapping) {
-	sort.SliceStable(files, func(i, j int) bool {
-		return files[i].dest < files[j].dest
-	})
-}
diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go
index dbde270..cb58436 100644
--- a/cmd/merge_zips/merge_zips_test.go
+++ b/cmd/merge_zips/merge_zips_test.go
@@ -51,6 +51,39 @@
 	moduleInfoFile = testZipEntry{jar.ModuleInfoClass, 0755, []byte("module-info")}
 )
 
+type testInputZip struct {
+	name    string
+	entries []testZipEntry
+	reader  *zip.Reader
+}
+
+func (tiz *testInputZip) Name() string {
+	return tiz.name
+}
+
+func (tiz *testInputZip) Open() error {
+	if tiz.reader == nil {
+		tiz.reader = testZipEntriesToZipReader(tiz.entries)
+	}
+	return nil
+}
+
+func (tiz *testInputZip) Close() error {
+	tiz.reader = nil
+	return nil
+}
+
+func (tiz *testInputZip) Entries() []*zip.File {
+	if tiz.reader == nil {
+		panic(fmt.Errorf("%s: should be open to get entries", tiz.Name()))
+	}
+	return tiz.reader.File
+}
+
+func (tiz *testInputZip) IsOpen() bool {
+	return tiz.reader != nil
+}
+
 func TestMergeZips(t *testing.T) {
 	testCases := []struct {
 		name             string
@@ -207,13 +240,9 @@
 
 	for _, test := range testCases {
 		t.Run(test.name, func(t *testing.T) {
-			var readers []namedZipReader
+			inputZips := make([]InputZip, len(test.in))
 			for i, in := range test.in {
-				r := testZipEntriesToZipReader(in)
-				readers = append(readers, namedZipReader{
-					path:   "in" + strconv.Itoa(i),
-					reader: r,
-				})
+				inputZips[i] = &testInputZip{name: "in" + strconv.Itoa(i), entries: in}
 			}
 
 			want := testZipEntriesToBuf(test.out)
@@ -221,7 +250,7 @@
 			out := &bytes.Buffer{}
 			writer := zip.NewWriter(out)
 
-			err := mergeZips(readers, writer, "", "",
+			err := mergeZips(inputZips, writer, "", "",
 				test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates,
 				test.stripFiles, test.stripDirs, test.zipsToNotStrip)
 
@@ -304,3 +333,60 @@
 
 	return ret
 }
+
+type DummyInpuZip struct {
+	isOpen bool
+}
+
+func (diz *DummyInpuZip) Name() string {
+	return "dummy"
+}
+
+func (diz *DummyInpuZip) Open() error {
+	diz.isOpen = true
+	return nil
+}
+
+func (diz *DummyInpuZip) Close() error {
+	diz.isOpen = false
+	return nil
+}
+
+func (DummyInpuZip) Entries() []*zip.File {
+	panic("implement me")
+}
+
+func (diz *DummyInpuZip) IsOpen() bool {
+	return diz.isOpen
+}
+
+func TestInputZipsManager(t *testing.T) {
+	const nInputZips = 20
+	const nMaxOpenZips = 10
+	izm := NewInputZipsManager(20, 10)
+	managedZips := make([]InputZip, nInputZips)
+	for i := 0; i < nInputZips; i++ {
+		managedZips[i] = izm.Manage(&DummyInpuZip{})
+	}
+
+	t.Run("InputZipsManager", func(t *testing.T) {
+		for i, iz := range managedZips {
+			if err := iz.Open(); err != nil {
+				t.Fatalf("Step %d: open failed: %s", i, err)
+				return
+			}
+			if izm.nOpenZips > nMaxOpenZips {
+				t.Errorf("Step %d: should be <=%d open zips", i, nMaxOpenZips)
+			}
+		}
+		if !managedZips[nInputZips-1].IsOpen() {
+			t.Error("The last input should stay open")
+		}
+		for _, iz := range managedZips {
+			iz.Close()
+		}
+		if izm.nOpenZips > 0 {
+			t.Error("Some input zips are still open")
+		}
+	})
+}