Merge "Allow reading zipfile list from a file."
diff --git a/cmd/merge_zips/Android.bp b/cmd/merge_zips/Android.bp
index ab658fd..f70c86e 100644
--- a/cmd/merge_zips/Android.bp
+++ b/cmd/merge_zips/Android.bp
@@ -18,6 +18,7 @@
"android-archive-zip",
"blueprint-pathtools",
"soong-jar",
+ "soong-zip",
],
srcs: [
"merge_zips.go",
diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go
index 68fe259..27179cb 100644
--- a/cmd/merge_zips/merge_zips.go
+++ b/cmd/merge_zips/merge_zips.go
@@ -30,8 +30,566 @@
"android/soong/jar"
"android/soong/third_party/zip"
+ soongZip "android/soong/zip"
)
+// Input zip: we can open it, close it, and obtain an array of entries
+type InputZip interface {
+ Name() string
+ Open() error
+ Close() error
+ Entries() []*zip.File
+ IsOpen() bool
+}
+
+// An entry that can be written to the output zip
+type ZipEntryContents interface {
+ String() string
+ IsDir() bool
+ CRC32() uint32
+ Size() uint64
+ WriteToZip(dest string, zw *zip.Writer) error
+}
+
+// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip
+// identified by the input zip and the index of the entry in its entries array
+type ZipEntryFromZip struct {
+ inputZip InputZip
+ index int
+ name string
+ isDir bool
+ crc32 uint32
+ size uint64
+}
+
+func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip {
+ fi := inputZip.Entries()[entryIndex]
+ newEntry := ZipEntryFromZip{inputZip: inputZip,
+ index: entryIndex,
+ name: fi.Name,
+ isDir: fi.FileInfo().IsDir(),
+ crc32: fi.CRC32,
+ size: fi.UncompressedSize64,
+ }
+ return &newEntry
+}
+
+func (ze ZipEntryFromZip) String() string {
+ return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name)
+}
+
+func (ze ZipEntryFromZip) IsDir() bool {
+ return ze.isDir
+}
+
+func (ze ZipEntryFromZip) CRC32() uint32 {
+ return ze.crc32
+}
+
+func (ze ZipEntryFromZip) Size() uint64 {
+ return ze.size
+}
+
+func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error {
+ if err := ze.inputZip.Open(); err != nil {
+ return err
+ }
+ return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest)
+}
+
+// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte
+type ZipEntryFromBuffer struct {
+ fh *zip.FileHeader
+ content []byte
+}
+
+func (be ZipEntryFromBuffer) String() string {
+ return "internal buffer"
+}
+
+func (be ZipEntryFromBuffer) IsDir() bool {
+ return be.fh.FileInfo().IsDir()
+}
+
+func (be ZipEntryFromBuffer) CRC32() uint32 {
+ return crc32.ChecksumIEEE(be.content)
+}
+
+func (be ZipEntryFromBuffer) Size() uint64 {
+ return uint64(len(be.content))
+}
+
+func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error {
+ w, err := zw.CreateHeader(be.fh)
+ if err != nil {
+ return err
+ }
+
+ if !be.IsDir() {
+ _, err = w.Write(be.content)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// Processing state.
+type OutputZip struct {
+ outputWriter *zip.Writer
+ stripDirEntries bool
+ emulateJar bool
+ sortEntries bool
+ ignoreDuplicates bool
+ excludeDirs []string
+ excludeFiles []string
+ sourceByDest map[string]ZipEntryContents
+}
+
+func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip {
+ return &OutputZip{
+ outputWriter: outputWriter,
+ stripDirEntries: stripDirEntries,
+ emulateJar: emulateJar,
+ sortEntries: sortEntries,
+ sourceByDest: make(map[string]ZipEntryContents, 0),
+ ignoreDuplicates: ignoreDuplicates,
+ }
+}
+
+func (oz *OutputZip) setExcludeDirs(excludeDirs []string) {
+ oz.excludeDirs = make([]string, len(excludeDirs))
+ for i, dir := range excludeDirs {
+ oz.excludeDirs[i] = filepath.Clean(dir)
+ }
+}
+
+func (oz *OutputZip) setExcludeFiles(excludeFiles []string) {
+ oz.excludeFiles = excludeFiles
+}
+
+// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents
+// if entry with given name already exists.
+func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) {
+ if existingSource, exists := oz.sourceByDest[name]; exists {
+ return existingSource, nil
+ }
+ oz.sourceByDest[name] = source
+ // Delay writing an entry if entries need to be rearranged.
+ if oz.emulateJar || oz.sortEntries {
+ return nil, nil
+ }
+ return nil, source.WriteToZip(name, oz.outputWriter)
+}
+
+// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file
+func (oz *OutputZip) addManifest(manifestPath string) error {
+ if !oz.stripDirEntries {
+ if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil {
+ return err
+ }
+ }
+ contents, err := ioutil.ReadFile(manifestPath)
+ if err == nil {
+ fh, buf, err := jar.ManifestFileContents(contents)
+ if err == nil {
+ _, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf})
+ }
+ }
+ return err
+}
+
+// Adds an entry with given name and contents read from given file
+func (oz *OutputZip) addZipEntryFromFile(name string, path string) error {
+ buf, err := ioutil.ReadFile(path)
+ if err == nil {
+ fh := &zip.FileHeader{
+ Name: name,
+ Method: zip.Store,
+ UncompressedSize64: uint64(len(buf)),
+ }
+ fh.SetMode(0700)
+ fh.SetModTime(jar.DefaultTime)
+ _, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf})
+ }
+ return err
+}
+
+func (oz *OutputZip) addEmptyEntry(entry string) error {
+ var emptyBuf []byte
+ fh := &zip.FileHeader{
+ Name: entry,
+ Method: zip.Store,
+ UncompressedSize64: uint64(len(emptyBuf)),
+ }
+ fh.SetMode(0700)
+ fh.SetModTime(jar.DefaultTime)
+ _, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf})
+ return err
+}
+
+// Returns true if given entry is to be excluded
+func (oz *OutputZip) isEntryExcluded(name string) bool {
+ for _, dir := range oz.excludeDirs {
+ dir = filepath.Clean(dir)
+ patterns := []string{
+ dir + "/", // the directory itself
+ dir + "/**/*", // files recursively in the directory
+ dir + "/**/*/", // directories recursively in the directory
+ }
+
+ for _, pattern := range patterns {
+ match, err := pathtools.Match(pattern, name)
+ if err != nil {
+ panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+ }
+ if match {
+ if oz.emulateJar {
+ // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
+ // requested.
+ // TODO(ccross): which files does this affect?
+ if name != jar.MetaDir && name != jar.ManifestFile {
+ return true
+ }
+ }
+ return true
+ }
+ }
+ }
+
+ for _, pattern := range oz.excludeFiles {
+ match, err := pathtools.Match(pattern, name)
+ if err != nil {
+ panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+ }
+ if match {
+ return true
+ }
+ }
+ return false
+}
+
+// Creates a zip entry whose contents is an entry from the given input zip.
+func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error {
+ entry := NewZipEntryFromZip(inputZip, index)
+ if oz.stripDirEntries && entry.IsDir() {
+ return nil
+ }
+ existingEntry, err := oz.addZipEntry(entry.name, entry)
+ if err != nil {
+ return err
+ }
+ if existingEntry == nil {
+ return nil
+ }
+
+ // File types should match
+ if existingEntry.IsDir() != entry.IsDir() {
+ return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
+ entry.name, existingEntry, entry)
+ }
+
+ if oz.ignoreDuplicates ||
+ // Skip manifest and module info files that are not from the first input file
+ (oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) ||
+ // Identical entries
+ (existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) ||
+ // Directory entries
+ entry.IsDir() {
+ return nil
+ }
+
+ return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name())
+}
+
+func (oz *OutputZip) entriesArray() []string {
+ entries := make([]string, len(oz.sourceByDest))
+ i := 0
+ for entry := range oz.sourceByDest {
+ entries[i] = entry
+ i++
+ }
+ return entries
+}
+
+func (oz *OutputZip) jarSorted() []string {
+ entries := oz.entriesArray()
+ sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) })
+ return entries
+}
+
+func (oz *OutputZip) alphanumericSorted() []string {
+ entries := oz.entriesArray()
+ sort.Strings(entries)
+ return entries
+}
+
+func (oz *OutputZip) writeEntries(entries []string) error {
+ for _, entry := range entries {
+ source, _ := oz.sourceByDest[entry]
+ if err := source.WriteToZip(entry, oz.outputWriter); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
+ // the runfiles packages needs to be populated with "__init__.py".
+ // the runfiles dirs have been treated as packages.
+ allPackages := make(map[string]bool)
+ initedPackages := make(map[string]bool)
+ getPackage := func(path string) string {
+ ret := filepath.Dir(path)
+ // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
+ if ret == "." || ret == "/" {
+ return ""
+ }
+ return ret
+ }
+
+ // put existing __init__.py files to a set first. This set is used for preventing
+ // generated __init__.py files from overwriting existing ones.
+ for _, inputZip := range inputZips {
+ if err := inputZip.Open(); err != nil {
+ return nil, err
+ }
+ for _, file := range inputZip.Entries() {
+ pyPkg := getPackage(file.Name)
+ if filepath.Base(file.Name) == "__init__.py" {
+ if _, found := initedPackages[pyPkg]; found {
+ panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name))
+ }
+ initedPackages[pyPkg] = true
+ }
+ for pyPkg != "" {
+ if _, found := allPackages[pyPkg]; found {
+ break
+ }
+ allPackages[pyPkg] = true
+ pyPkg = getPackage(pyPkg)
+ }
+ }
+ }
+ noInitPackages := make([]string, 0)
+ for pyPkg := range allPackages {
+ if _, found := initedPackages[pyPkg]; !found {
+ noInitPackages = append(noInitPackages, pyPkg)
+ }
+ }
+ return noInitPackages, nil
+}
+
+// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order.
+type ManagedInputZip struct {
+ owner *InputZipsManager
+ realInputZip InputZip
+ older *ManagedInputZip
+ newer *ManagedInputZip
+}
+
+// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened,
+// may close some other InputZip to limit the number of open ones.
+type InputZipsManager struct {
+ inputZips []*ManagedInputZip
+ nOpenZips int
+ maxOpenZips int
+ openInputZips *ManagedInputZip
+}
+
+func (miz *ManagedInputZip) unlink() {
+ olderMiz := miz.older
+ newerMiz := miz.newer
+ if newerMiz.older != miz || olderMiz.newer != miz {
+ panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v",
+ miz, miz, newerMiz, newerMiz, olderMiz, olderMiz))
+ }
+ olderMiz.newer = newerMiz
+ newerMiz.older = olderMiz
+ miz.newer = nil
+ miz.older = nil
+}
+
+func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) {
+ if olderMiz.newer != nil || olderMiz.older != nil {
+ panic(fmt.Errorf("inputZip is already open"))
+ }
+ oldOlderMiz := miz.older
+ if oldOlderMiz.newer != miz {
+ panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, oldOlderMiz))
+ }
+ miz.older = olderMiz
+ olderMiz.older = oldOlderMiz
+ oldOlderMiz.newer = olderMiz
+ olderMiz.newer = miz
+}
+
+func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager {
+ if maxOpenZips < 3 {
+ panic(fmt.Errorf("open zips limit should be above 3"))
+ }
+ // In the dummy element .older points to the most recently opened InputZip, and .newer points to the oldest.
+ head := new(ManagedInputZip)
+ head.older = head
+ head.newer = head
+ return &InputZipsManager{
+ inputZips: make([]*ManagedInputZip, 0, nInputZips),
+ maxOpenZips: maxOpenZips,
+ openInputZips: head,
+ }
+}
+
+// InputZip factory
+func (izm *InputZipsManager) Manage(inz InputZip) InputZip {
+ iz := &ManagedInputZip{owner: izm, realInputZip: inz}
+ izm.inputZips = append(izm.inputZips, iz)
+ return iz
+}
+
+// Opens or reopens ManagedInputZip.
+func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error {
+ if miz.realInputZip.IsOpen() {
+ if miz != izm.openInputZips {
+ miz.unlink()
+ izm.openInputZips.link(miz)
+ }
+ return nil
+ }
+ if izm.nOpenZips >= izm.maxOpenZips {
+ if err := izm.close(izm.openInputZips.older); err != nil {
+ return err
+ }
+ }
+ if err := miz.realInputZip.Open(); err != nil {
+ return err
+ }
+ izm.openInputZips.link(miz)
+ izm.nOpenZips++
+ return nil
+}
+
+func (izm *InputZipsManager) close(miz *ManagedInputZip) error {
+ if miz.IsOpen() {
+ err := miz.realInputZip.Close()
+ izm.nOpenZips--
+ miz.unlink()
+ return err
+ }
+ return nil
+}
+
+// Checks that openInputZips deque is valid
+func (izm *InputZipsManager) checkOpenZipsDeque() {
+ nReallyOpen := 0
+ el := izm.openInputZips
+ for {
+ elNext := el.older
+ if elNext.newer != el {
+ panic(fmt.Errorf("Element:\n %p: %v\nNext:\n %p %v", el, el, elNext, elNext))
+ }
+ if elNext == izm.openInputZips {
+ break
+ }
+ el = elNext
+ if !el.IsOpen() {
+ panic(fmt.Errorf("Found unopened element"))
+ }
+ nReallyOpen++
+ if nReallyOpen > izm.nOpenZips {
+ panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+ }
+ }
+ if nReallyOpen > izm.nOpenZips {
+ panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+ }
+}
+
+func (miz *ManagedInputZip) Name() string {
+ return miz.realInputZip.Name()
+}
+
+func (miz *ManagedInputZip) Open() error {
+ return miz.owner.reopen(miz)
+}
+
+func (miz *ManagedInputZip) Close() error {
+ return miz.owner.close(miz)
+}
+
+func (miz *ManagedInputZip) IsOpen() bool {
+ return miz.realInputZip.IsOpen()
+}
+
+func (miz *ManagedInputZip) Entries() []*zip.File {
+ if !miz.IsOpen() {
+ panic(fmt.Errorf("%s: is not open", miz.Name()))
+ }
+ return miz.realInputZip.Entries()
+}
+
+// Actual processing.
+func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string,
+ sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
+ excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error {
+
+ out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates)
+ out.setExcludeFiles(excludeFiles)
+ out.setExcludeDirs(excludeDirs)
+ if manifest != "" {
+ if err := out.addManifest(manifest); err != nil {
+ return err
+ }
+ }
+ if pyMain != "" {
+ if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil {
+ return err
+ }
+ }
+
+ if emulatePar {
+ noInitPackages, err := out.getUninitializedPythonPackages(inputZips)
+ if err != nil {
+ return err
+ }
+ for _, uninitializedPyPackage := range noInitPackages {
+ if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil {
+ return err
+ }
+ }
+ }
+
+ // Finally, add entries from all the input zips.
+ for _, inputZip := range inputZips {
+ _, copyFully := zipsToNotStrip[inputZip.Name()]
+ if err := inputZip.Open(); err != nil {
+ return err
+ }
+
+ for i, entry := range inputZip.Entries() {
+ if copyFully || !out.isEntryExcluded(entry.Name) {
+ if err := out.copyEntry(inputZip, i); err != nil {
+ return err
+ }
+ }
+ }
+ // Unless we need to rearrange the entries, the input zip can now be closed.
+ if !(emulateJar || sortEntries) {
+ if err := inputZip.Close(); err != nil {
+ return err
+ }
+ }
+ }
+
+ if emulateJar {
+ return out.writeEntries(out.jarSorted())
+ } else if sortEntries {
+ return out.writeEntries(out.alphanumericSorted())
+ }
+ return nil
+}
+
+// Process command line
type fileList []string
func (f *fileList) String() string {
@@ -50,9 +608,8 @@
return `""`
}
-func (s zipsToNotStripSet) Set(zip_path string) error {
- s[zip_path] = true
-
+func (s zipsToNotStripSet) Set(path string) error {
+ s[path] = true
return nil
}
@@ -60,8 +617,8 @@
sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
- stripDirs fileList
- stripFiles fileList
+ excludeDirs fileList
+ excludeFiles fileList
zipsToNotStrip = make(zipsToNotStripSet)
stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
manifest = flag.String("m", "", "manifest file to insert in jar")
@@ -71,14 +628,52 @@
)
func init() {
- flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
- flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
+ flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
+ flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
}
+type FileInputZip struct {
+ name string
+ reader *zip.ReadCloser
+}
+
+func (fiz *FileInputZip) Name() string {
+ return fiz.name
+}
+
+func (fiz *FileInputZip) Close() error {
+ if fiz.IsOpen() {
+ reader := fiz.reader
+ fiz.reader = nil
+ return reader.Close()
+ }
+ return nil
+}
+
+func (fiz *FileInputZip) Entries() []*zip.File {
+ if !fiz.IsOpen() {
+ panic(fmt.Errorf("%s: is not open", fiz.Name()))
+ }
+ return fiz.reader.File
+}
+
+func (fiz *FileInputZip) IsOpen() bool {
+ return fiz.reader != nil
+}
+
+func (fiz *FileInputZip) Open() error {
+ if fiz.IsOpen() {
+ return nil
+ }
+ var err error
+ fiz.reader, err = zip.OpenReader(fiz.Name())
+ return err
+}
+
func main() {
flag.Usage = func() {
- fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]")
+ fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]")
flag.PrintDefaults()
}
@@ -90,16 +685,28 @@
os.Exit(1)
}
outputPath := args[0]
- inputs := args[1:]
+ inputs := make([]string, 0)
+ for _, input := range args[1:] {
+ if input[0] == '@' {
+ bytes, err := ioutil.ReadFile(input[1:])
+ if err != nil {
+ log.Fatal(err)
+ }
+ inputs = append(inputs, soongZip.ReadRespFile(bytes)...)
+ continue
+ }
+ inputs = append(inputs, input)
+ continue
+ }
log.SetFlags(log.Lshortfile)
// make writer
- output, err := os.Create(outputPath)
+ outputZip, err := os.Create(outputPath)
if err != nil {
log.Fatal(err)
}
- defer output.Close()
+ defer outputZip.Close()
var offset int64
if *prefix != "" {
@@ -107,13 +714,13 @@
if err != nil {
log.Fatal(err)
}
- offset, err = io.Copy(output, prefixFile)
+ offset, err = io.Copy(outputZip, prefixFile)
if err != nil {
log.Fatal(err)
}
}
- writer := zip.NewWriter(output)
+ writer := zip.NewWriter(outputZip)
defer func() {
err := writer.Close()
if err != nil {
@@ -122,18 +729,6 @@
}()
writer.SetOffset(offset)
- // make readers
- readers := []namedZipReader{}
- for _, input := range inputs {
- reader, err := zip.OpenReader(input)
- if err != nil {
- log.Fatal(err)
- }
- defer reader.Close()
- namedReader := namedZipReader{path: input, reader: &reader.Reader}
- readers = append(readers, namedReader)
- }
-
if *manifest != "" && !*emulateJar {
log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
}
@@ -143,344 +738,15 @@
}
// do merge
- err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
- *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
+ inputZipsManager := NewInputZipsManager(len(inputs), 1000)
+ inputZips := make([]InputZip, len(inputs))
+ for i, input := range inputs {
+ inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input})
+ }
+ err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
+ *stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs),
+ map[string]bool(zipsToNotStrip))
if err != nil {
log.Fatal(err)
}
}
-
-// a namedZipReader reads a .zip file and can say which file it's reading
-type namedZipReader struct {
- path string
- reader *zip.Reader
-}
-
-// a zipEntryPath refers to a file contained in a zip
-type zipEntryPath struct {
- zipName string
- entryName string
-}
-
-func (p zipEntryPath) String() string {
- return p.zipName + "/" + p.entryName
-}
-
-// a zipEntry is a zipSource that pulls its content from another zip
-type zipEntry struct {
- path zipEntryPath
- content *zip.File
-}
-
-func (ze zipEntry) String() string {
- return ze.path.String()
-}
-
-func (ze zipEntry) IsDir() bool {
- return ze.content.FileInfo().IsDir()
-}
-
-func (ze zipEntry) CRC32() uint32 {
- return ze.content.FileHeader.CRC32
-}
-
-func (ze zipEntry) Size() uint64 {
- return ze.content.FileHeader.UncompressedSize64
-}
-
-func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
- return zw.CopyFrom(ze.content, dest)
-}
-
-// a bufferEntry is a zipSource that pulls its content from a []byte
-type bufferEntry struct {
- fh *zip.FileHeader
- content []byte
-}
-
-func (be bufferEntry) String() string {
- return "internal buffer"
-}
-
-func (be bufferEntry) IsDir() bool {
- return be.fh.FileInfo().IsDir()
-}
-
-func (be bufferEntry) CRC32() uint32 {
- return crc32.ChecksumIEEE(be.content)
-}
-
-func (be bufferEntry) Size() uint64 {
- return uint64(len(be.content))
-}
-
-func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
- w, err := zw.CreateHeader(be.fh)
- if err != nil {
- return err
- }
-
- if !be.IsDir() {
- _, err = w.Write(be.content)
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-type zipSource interface {
- String() string
- IsDir() bool
- CRC32() uint32
- Size() uint64
- WriteToZip(dest string, zw *zip.Writer) error
-}
-
-// a fileMapping specifies to copy a zip entry from one place to another
-type fileMapping struct {
- dest string
- source zipSource
-}
-
-func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string,
- sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
- stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
-
- sourceByDest := make(map[string]zipSource, 0)
- orderedMappings := []fileMapping{}
-
- // if dest already exists returns a non-null zipSource for the existing source
- addMapping := func(dest string, source zipSource) zipSource {
- mapKey := filepath.Clean(dest)
- if existingSource, exists := sourceByDest[mapKey]; exists {
- return existingSource
- }
-
- sourceByDest[mapKey] = source
- orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
- return nil
- }
-
- if manifest != "" {
- if !stripDirEntries {
- dirHeader := jar.MetaDirFileHeader()
- dirSource := bufferEntry{dirHeader, nil}
- addMapping(jar.MetaDir, dirSource)
- }
-
- contents, err := ioutil.ReadFile(manifest)
- if err != nil {
- return err
- }
-
- fh, buf, err := jar.ManifestFileContents(contents)
- if err != nil {
- return err
- }
-
- fileSource := bufferEntry{fh, buf}
- addMapping(jar.ManifestFile, fileSource)
- }
-
- if pyMain != "" {
- buf, err := ioutil.ReadFile(pyMain)
- if err != nil {
- return err
- }
- fh := &zip.FileHeader{
- Name: "__main__.py",
- Method: zip.Store,
- UncompressedSize64: uint64(len(buf)),
- }
- fh.SetMode(0700)
- fh.SetModTime(jar.DefaultTime)
- fileSource := bufferEntry{fh, buf}
- addMapping("__main__.py", fileSource)
- }
-
- if emulatePar {
- // the runfiles packages needs to be populated with "__init__.py".
- newPyPkgs := []string{}
- // the runfiles dirs have been treated as packages.
- existingPyPkgSet := make(map[string]bool)
- // put existing __init__.py files to a set first. This set is used for preventing
- // generated __init__.py files from overwriting existing ones.
- for _, namedReader := range readers {
- for _, file := range namedReader.reader.File {
- if filepath.Base(file.Name) != "__init__.py" {
- continue
- }
- pyPkg := pathBeforeLastSlash(file.Name)
- if _, found := existingPyPkgSet[pyPkg]; found {
- panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
- } else {
- existingPyPkgSet[pyPkg] = true
- }
- }
- }
- for _, namedReader := range readers {
- for _, file := range namedReader.reader.File {
- var parentPath string /* the path after trimming last "/" */
- if filepath.Base(file.Name) == "__init__.py" {
- // for existing __init__.py files, we should trim last "/" for twice.
- // eg. a/b/c/__init__.py ---> a/b
- parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
- } else {
- parentPath = pathBeforeLastSlash(file.Name)
- }
- populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
- }
- }
- for _, pkg := range newPyPkgs {
- var emptyBuf []byte
- fh := &zip.FileHeader{
- Name: filepath.Join(pkg, "__init__.py"),
- Method: zip.Store,
- UncompressedSize64: uint64(len(emptyBuf)),
- }
- fh.SetMode(0700)
- fh.SetModTime(jar.DefaultTime)
- fileSource := bufferEntry{fh, emptyBuf}
- addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
- }
- }
- for _, namedReader := range readers {
- _, skipStripThisZip := zipsToNotStrip[namedReader.path]
- for _, file := range namedReader.reader.File {
- if !skipStripThisZip {
- if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
- return err
- } else if skip {
- continue
- }
- }
-
- if stripDirEntries && file.FileInfo().IsDir() {
- continue
- }
-
- // check for other files or directories destined for the same path
- dest := file.Name
-
- // make a new entry to add
- source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
-
- if existingSource := addMapping(dest, source); existingSource != nil {
- // handle duplicates
- if existingSource.IsDir() != source.IsDir() {
- return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
- dest, existingSource, source)
- }
-
- if ignoreDuplicates {
- continue
- }
-
- if emulateJar &&
- file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
- // Skip manifest and module info files that are not from the first input file
- continue
- }
-
- if source.IsDir() {
- continue
- }
-
- if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
- continue
- }
-
- return fmt.Errorf("Duplicate path %v found in %v and %v\n",
- dest, existingSource, source)
- }
- }
- }
-
- if emulateJar {
- jarSort(orderedMappings)
- } else if sortEntries {
- alphanumericSort(orderedMappings)
- }
-
- for _, entry := range orderedMappings {
- if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// Sets the given directory and all its ancestor directories as Python packages.
-func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
- for pkgPath != "" {
- if _, found := existingPyPkgSet[pkgPath]; !found {
- existingPyPkgSet[pkgPath] = true
- *newPyPkgs = append(*newPyPkgs, pkgPath)
- // Gets its ancestor directory by trimming last slash.
- pkgPath = pathBeforeLastSlash(pkgPath)
- } else {
- break
- }
- }
-}
-
-func pathBeforeLastSlash(path string) string {
- ret := filepath.Dir(path)
- // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
- if ret == "." || ret == "/" {
- return ""
- }
- return ret
-}
-
-func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
- for _, dir := range stripDirs {
- dir = filepath.Clean(dir)
- patterns := []string{
- dir + "/", // the directory itself
- dir + "/**/*", // files recursively in the directory
- dir + "/**/*/", // directories recursively in the directory
- }
-
- for _, pattern := range patterns {
- match, err := pathtools.Match(pattern, name)
- if err != nil {
- return false, fmt.Errorf("%s: %s", err.Error(), pattern)
- } else if match {
- if emulateJar {
- // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
- // requested.
- // TODO(ccross): which files does this affect?
- if name != jar.MetaDir && name != jar.ManifestFile {
- return true, nil
- }
- }
- return true, nil
- }
- }
- }
-
- for _, pattern := range stripFiles {
- if match, err := pathtools.Match(pattern, name); err != nil {
- return false, fmt.Errorf("%s: %s", err.Error(), pattern)
- } else if match {
- return true, nil
- }
- }
- return false, nil
-}
-
-func jarSort(files []fileMapping) {
- sort.SliceStable(files, func(i, j int) bool {
- return jar.EntryNamesLess(files[i].dest, files[j].dest)
- })
-}
-
-func alphanumericSort(files []fileMapping) {
- sort.SliceStable(files, func(i, j int) bool {
- return files[i].dest < files[j].dest
- })
-}
diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go
index dbde270..cb58436 100644
--- a/cmd/merge_zips/merge_zips_test.go
+++ b/cmd/merge_zips/merge_zips_test.go
@@ -51,6 +51,39 @@
moduleInfoFile = testZipEntry{jar.ModuleInfoClass, 0755, []byte("module-info")}
)
+type testInputZip struct {
+ name string
+ entries []testZipEntry
+ reader *zip.Reader
+}
+
+func (tiz *testInputZip) Name() string {
+ return tiz.name
+}
+
+func (tiz *testInputZip) Open() error {
+ if tiz.reader == nil {
+ tiz.reader = testZipEntriesToZipReader(tiz.entries)
+ }
+ return nil
+}
+
+func (tiz *testInputZip) Close() error {
+ tiz.reader = nil
+ return nil
+}
+
+func (tiz *testInputZip) Entries() []*zip.File {
+ if tiz.reader == nil {
+ panic(fmt.Errorf("%s: should be open to get entries", tiz.Name()))
+ }
+ return tiz.reader.File
+}
+
+func (tiz *testInputZip) IsOpen() bool {
+ return tiz.reader != nil
+}
+
func TestMergeZips(t *testing.T) {
testCases := []struct {
name string
@@ -207,13 +240,9 @@
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
- var readers []namedZipReader
+ inputZips := make([]InputZip, len(test.in))
for i, in := range test.in {
- r := testZipEntriesToZipReader(in)
- readers = append(readers, namedZipReader{
- path: "in" + strconv.Itoa(i),
- reader: r,
- })
+ inputZips[i] = &testInputZip{name: "in" + strconv.Itoa(i), entries: in}
}
want := testZipEntriesToBuf(test.out)
@@ -221,7 +250,7 @@
out := &bytes.Buffer{}
writer := zip.NewWriter(out)
- err := mergeZips(readers, writer, "", "",
+ err := mergeZips(inputZips, writer, "", "",
test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates,
test.stripFiles, test.stripDirs, test.zipsToNotStrip)
@@ -304,3 +333,60 @@
return ret
}
+
+type DummyInpuZip struct {
+ isOpen bool
+}
+
+func (diz *DummyInpuZip) Name() string {
+ return "dummy"
+}
+
+func (diz *DummyInpuZip) Open() error {
+ diz.isOpen = true
+ return nil
+}
+
+func (diz *DummyInpuZip) Close() error {
+ diz.isOpen = false
+ return nil
+}
+
+func (DummyInpuZip) Entries() []*zip.File {
+ panic("implement me")
+}
+
+func (diz *DummyInpuZip) IsOpen() bool {
+ return diz.isOpen
+}
+
+func TestInputZipsManager(t *testing.T) {
+ const nInputZips = 20
+ const nMaxOpenZips = 10
+ izm := NewInputZipsManager(20, 10)
+ managedZips := make([]InputZip, nInputZips)
+ for i := 0; i < nInputZips; i++ {
+ managedZips[i] = izm.Manage(&DummyInpuZip{})
+ }
+
+ t.Run("InputZipsManager", func(t *testing.T) {
+ for i, iz := range managedZips {
+ if err := iz.Open(); err != nil {
+ t.Fatalf("Step %d: open failed: %s", i, err)
+ return
+ }
+ if izm.nOpenZips > nMaxOpenZips {
+ t.Errorf("Step %d: should be <=%d open zips", i, nMaxOpenZips)
+ }
+ }
+ if !managedZips[nInputZips-1].IsOpen() {
+ t.Error("The last input should stay open")
+ }
+ for _, iz := range managedZips {
+ iz.Close()
+ }
+ if izm.nOpenZips > 0 {
+ t.Error("Some input zips are still open")
+ }
+ })
+}