blob: 8e71a978432326424e96b01f69cefd918fe58762 [file] [log] [blame]
Jeff Gaston8bab5f22017-09-01 13:34:28 -07001// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
Colin Cross635acc92017-09-12 22:50:46 -070018 "errors"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070019 "flag"
20 "fmt"
Colin Cross635acc92017-09-12 22:50:46 -070021 "hash/crc32"
Nan Zhang5925b0f2017-12-19 15:13:40 -080022 "io/ioutil"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070023 "log"
24 "os"
Nan Zhang13f4cf52017-09-19 18:42:01 -070025 "path/filepath"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070026 "sort"
Colin Cross4c03f682018-07-15 08:16:31 -070027
28 "github.com/google/blueprint/pathtools"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070029
30 "android/soong/jar"
31 "android/soong/third_party/zip"
32)
33
Colin Cross0cf45cd2017-10-04 17:04:16 -070034type fileList []string
Nan Zhangd5998cc2017-09-13 13:17:43 -070035
Colin Cross0cf45cd2017-10-04 17:04:16 -070036func (f *fileList) String() string {
Nan Zhangd5998cc2017-09-13 13:17:43 -070037 return `""`
38}
39
Colin Cross0cf45cd2017-10-04 17:04:16 -070040func (f *fileList) Set(name string) error {
41 *f = append(*f, filepath.Clean(name))
Nan Zhang13f4cf52017-09-19 18:42:01 -070042
43 return nil
44}
45
Colin Cross0cf45cd2017-10-04 17:04:16 -070046type zipsToNotStripSet map[string]bool
Nan Zhang13f4cf52017-09-19 18:42:01 -070047
Colin Cross0cf45cd2017-10-04 17:04:16 -070048func (s zipsToNotStripSet) String() string {
Nan Zhang13f4cf52017-09-19 18:42:01 -070049 return `""`
50}
51
Colin Cross0cf45cd2017-10-04 17:04:16 -070052func (s zipsToNotStripSet) Set(zip_path string) error {
53 s[zip_path] = true
Nan Zhangd5998cc2017-09-13 13:17:43 -070054
55 return nil
56}
57
Jeff Gaston8bab5f22017-09-01 13:34:28 -070058var (
Colin Crosse909e1e2017-11-22 14:09:40 -080059 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
60 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
Nan Zhang5925b0f2017-12-19 15:13:40 -080061 emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
Colin Crosse909e1e2017-11-22 14:09:40 -080062 stripDirs fileList
63 stripFiles fileList
64 zipsToNotStrip = make(zipsToNotStripSet)
65 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
66 manifest = flag.String("m", "", "manifest file to insert in jar")
Nan Zhang1db85402017-12-18 13:20:23 -080067 pyMain = flag.String("pm", "", "__main__.py file to insert in par")
Nan Zhang5925b0f2017-12-19 15:13:40 -080068 entrypoint = flag.String("e", "", "par entrypoint file to insert in par")
Colin Crosse909e1e2017-11-22 14:09:40 -080069 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070070)
71
Nan Zhangd5998cc2017-09-13 13:17:43 -070072func init() {
Colin Cross4c03f682018-07-15 08:16:31 -070073 flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
74 flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
Colin Cross0cf45cd2017-10-04 17:04:16 -070075 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
Nan Zhangd5998cc2017-09-13 13:17:43 -070076}
77
Jeff Gaston8bab5f22017-09-01 13:34:28 -070078func main() {
79 flag.Usage = func() {
Nan Zhang1db85402017-12-18 13:20:23 -080080 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [-e entrypoint] [-pm __main__.py] output [inputs...]")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070081 flag.PrintDefaults()
82 }
83
84 // parse args
85 flag.Parse()
86 args := flag.Args()
Colin Cross5c6ecc12017-10-23 18:12:27 -070087 if len(args) < 1 {
Jeff Gaston8bab5f22017-09-01 13:34:28 -070088 flag.Usage()
89 os.Exit(1)
90 }
91 outputPath := args[0]
92 inputs := args[1:]
93
94 log.SetFlags(log.Lshortfile)
95
96 // make writer
97 output, err := os.Create(outputPath)
98 if err != nil {
99 log.Fatal(err)
100 }
101 defer output.Close()
102 writer := zip.NewWriter(output)
103 defer func() {
104 err := writer.Close()
105 if err != nil {
106 log.Fatal(err)
107 }
108 }()
109
110 // make readers
111 readers := []namedZipReader{}
112 for _, input := range inputs {
113 reader, err := zip.OpenReader(input)
114 if err != nil {
115 log.Fatal(err)
116 }
117 defer reader.Close()
Colin Cross24860652018-07-14 22:19:14 -0700118 namedReader := namedZipReader{path: input, reader: &reader.Reader}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700119 readers = append(readers, namedReader)
120 }
121
Colin Cross635acc92017-09-12 22:50:46 -0700122 if *manifest != "" && !*emulateJar {
123 log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
124 }
125
Nan Zhang5925b0f2017-12-19 15:13:40 -0800126 if *entrypoint != "" && !*emulatePar {
127 log.Fatal(errors.New("must specify -p when specifying a entrypoint via -e"))
128 }
129
Nan Zhang1db85402017-12-18 13:20:23 -0800130 if *pyMain != "" && !*emulatePar {
131 log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
132 }
133
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700134 // do merge
Nan Zhang1db85402017-12-18 13:20:23 -0800135 err = mergeZips(readers, writer, *manifest, *entrypoint, *pyMain, *sortEntries, *emulateJar, *emulatePar,
Colin Cross24860652018-07-14 22:19:14 -0700136 *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
Colin Cross635acc92017-09-12 22:50:46 -0700137 if err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700138 log.Fatal(err)
139 }
140}
141
142// a namedZipReader reads a .zip file and can say which file it's reading
143type namedZipReader struct {
144 path string
Colin Cross24860652018-07-14 22:19:14 -0700145 reader *zip.Reader
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700146}
147
148// a zipEntryPath refers to a file contained in a zip
149type zipEntryPath struct {
150 zipName string
151 entryName string
152}
153
154func (p zipEntryPath) String() string {
155 return p.zipName + "/" + p.entryName
156}
157
Colin Cross635acc92017-09-12 22:50:46 -0700158// a zipEntry is a zipSource that pulls its content from another zip
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700159type zipEntry struct {
160 path zipEntryPath
161 content *zip.File
162}
163
Colin Cross635acc92017-09-12 22:50:46 -0700164func (ze zipEntry) String() string {
165 return ze.path.String()
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700166}
167
Colin Cross635acc92017-09-12 22:50:46 -0700168func (ze zipEntry) IsDir() bool {
169 return ze.content.FileInfo().IsDir()
170}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700171
Colin Cross635acc92017-09-12 22:50:46 -0700172func (ze zipEntry) CRC32() uint32 {
173 return ze.content.FileHeader.CRC32
174}
175
Colin Crossdc1e8292018-10-17 15:05:56 -0700176func (ze zipEntry) Size() uint64 {
177 return ze.content.FileHeader.UncompressedSize64
178}
179
Colin Cross635acc92017-09-12 22:50:46 -0700180func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
181 return zw.CopyFrom(ze.content, dest)
182}
183
184// a bufferEntry is a zipSource that pulls its content from a []byte
185type bufferEntry struct {
186 fh *zip.FileHeader
187 content []byte
188}
189
190func (be bufferEntry) String() string {
191 return "internal buffer"
192}
193
194func (be bufferEntry) IsDir() bool {
195 return be.fh.FileInfo().IsDir()
196}
197
198func (be bufferEntry) CRC32() uint32 {
199 return crc32.ChecksumIEEE(be.content)
200}
201
Colin Crossdc1e8292018-10-17 15:05:56 -0700202func (be bufferEntry) Size() uint64 {
203 return uint64(len(be.content))
204}
205
Colin Cross635acc92017-09-12 22:50:46 -0700206func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
207 w, err := zw.CreateHeader(be.fh)
208 if err != nil {
209 return err
210 }
211
212 if !be.IsDir() {
213 _, err = w.Write(be.content)
214 if err != nil {
215 return err
216 }
217 }
218
219 return nil
220}
221
222type zipSource interface {
223 String() string
224 IsDir() bool
225 CRC32() uint32
Colin Crossdc1e8292018-10-17 15:05:56 -0700226 Size() uint64
Colin Cross635acc92017-09-12 22:50:46 -0700227 WriteToZip(dest string, zw *zip.Writer) error
228}
229
230// a fileMapping specifies to copy a zip entry from one place to another
231type fileMapping struct {
232 dest string
233 source zipSource
234}
235
Nan Zhang1db85402017-12-18 13:20:23 -0800236func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, entrypoint, pyMain string,
Colin Cross24860652018-07-14 22:19:14 -0700237 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
238 stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
Colin Cross635acc92017-09-12 22:50:46 -0700239
240 sourceByDest := make(map[string]zipSource, 0)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700241 orderedMappings := []fileMapping{}
242
Colin Cross635acc92017-09-12 22:50:46 -0700243 // if dest already exists returns a non-null zipSource for the existing source
244 addMapping := func(dest string, source zipSource) zipSource {
245 mapKey := filepath.Clean(dest)
246 if existingSource, exists := sourceByDest[mapKey]; exists {
247 return existingSource
248 }
249
250 sourceByDest[mapKey] = source
251 orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
252 return nil
253 }
254
255 if manifest != "" {
256 if !stripDirEntries {
257 dirHeader := jar.MetaDirFileHeader()
258 dirSource := bufferEntry{dirHeader, nil}
259 addMapping(jar.MetaDir, dirSource)
260 }
261
Colin Cross05518bc2018-09-27 15:06:19 -0700262 contents, err := ioutil.ReadFile(manifest)
263 if err != nil {
264 return err
265 }
266
267 fh, buf, err := jar.ManifestFileContents(contents)
Colin Cross635acc92017-09-12 22:50:46 -0700268 if err != nil {
269 return err
270 }
271
272 fileSource := bufferEntry{fh, buf}
273 addMapping(jar.ManifestFile, fileSource)
274 }
275
Nan Zhang5925b0f2017-12-19 15:13:40 -0800276 if entrypoint != "" {
277 buf, err := ioutil.ReadFile(entrypoint)
278 if err != nil {
279 return err
280 }
281 fh := &zip.FileHeader{
282 Name: "entry_point.txt",
283 Method: zip.Store,
284 UncompressedSize64: uint64(len(buf)),
285 }
286 fh.SetMode(0700)
287 fh.SetModTime(jar.DefaultTime)
288 fileSource := bufferEntry{fh, buf}
289 addMapping("entry_point.txt", fileSource)
290 }
291
Nan Zhang1db85402017-12-18 13:20:23 -0800292 if pyMain != "" {
293 buf, err := ioutil.ReadFile(pyMain)
294 if err != nil {
295 return err
296 }
297 fh := &zip.FileHeader{
298 Name: "__main__.py",
299 Method: zip.Store,
300 UncompressedSize64: uint64(len(buf)),
301 }
302 fh.SetMode(0700)
303 fh.SetModTime(jar.DefaultTime)
304 fileSource := bufferEntry{fh, buf}
305 addMapping("__main__.py", fileSource)
306 }
307
Nan Zhang5925b0f2017-12-19 15:13:40 -0800308 if emulatePar {
309 // the runfiles packages needs to be populated with "__init__.py".
310 newPyPkgs := []string{}
311 // the runfiles dirs have been treated as packages.
312 existingPyPkgSet := make(map[string]bool)
313 // put existing __init__.py files to a set first. This set is used for preventing
314 // generated __init__.py files from overwriting existing ones.
315 for _, namedReader := range readers {
316 for _, file := range namedReader.reader.File {
317 if filepath.Base(file.Name) != "__init__.py" {
318 continue
319 }
320 pyPkg := pathBeforeLastSlash(file.Name)
321 if _, found := existingPyPkgSet[pyPkg]; found {
322 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
323 } else {
324 existingPyPkgSet[pyPkg] = true
325 }
326 }
327 }
328 for _, namedReader := range readers {
329 for _, file := range namedReader.reader.File {
330 var parentPath string /* the path after trimming last "/" */
331 if filepath.Base(file.Name) == "__init__.py" {
332 // for existing __init__.py files, we should trim last "/" for twice.
333 // eg. a/b/c/__init__.py ---> a/b
334 parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
335 } else {
336 parentPath = pathBeforeLastSlash(file.Name)
337 }
338 populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
339 }
340 }
341 for _, pkg := range newPyPkgs {
342 var emptyBuf []byte
343 fh := &zip.FileHeader{
344 Name: filepath.Join(pkg, "__init__.py"),
345 Method: zip.Store,
346 UncompressedSize64: uint64(len(emptyBuf)),
347 }
348 fh.SetMode(0700)
349 fh.SetModTime(jar.DefaultTime)
350 fileSource := bufferEntry{fh, emptyBuf}
351 addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
352 }
353 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700354 for _, namedReader := range readers {
Nan Zhang13f4cf52017-09-19 18:42:01 -0700355 _, skipStripThisZip := zipsToNotStrip[namedReader.path]
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700356 for _, file := range namedReader.reader.File {
Colin Cross4c03f682018-07-15 08:16:31 -0700357 if !skipStripThisZip {
358 if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
359 return err
360 } else if skip {
361 continue
362 }
Nan Zhangd5998cc2017-09-13 13:17:43 -0700363 }
Colin Cross635acc92017-09-12 22:50:46 -0700364
365 if stripDirEntries && file.FileInfo().IsDir() {
366 continue
367 }
368
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700369 // check for other files or directories destined for the same path
370 dest := file.Name
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700371
372 // make a new entry to add
373 source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700374
Colin Cross635acc92017-09-12 22:50:46 -0700375 if existingSource := addMapping(dest, source); existingSource != nil {
Colin Cross34540312017-09-06 12:52:37 -0700376 // handle duplicates
Colin Cross635acc92017-09-12 22:50:46 -0700377 if existingSource.IsDir() != source.IsDir() {
Colin Cross34540312017-09-06 12:52:37 -0700378 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700379 dest, existingSource, source)
Colin Cross34540312017-09-06 12:52:37 -0700380 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700381
Colin Crosse909e1e2017-11-22 14:09:40 -0800382 if ignoreDuplicates {
383 continue
384 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700385
Colin Cross34540312017-09-06 12:52:37 -0700386 if emulateJar &&
387 file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
388 // Skip manifest and module info files that are not from the first input file
389 continue
390 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700391
392 if source.IsDir() {
393 continue
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700394 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700395
396 if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
397 continue
398 }
399
400 return fmt.Errorf("Duplicate path %v found in %v and %v\n",
401 dest, existingSource, source)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700402 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700403 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700404 }
405
Colin Cross34540312017-09-06 12:52:37 -0700406 if emulateJar {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700407 jarSort(orderedMappings)
408 } else if sortEntries {
409 alphanumericSort(orderedMappings)
410 }
411
412 for _, entry := range orderedMappings {
Colin Cross635acc92017-09-12 22:50:46 -0700413 if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700414 return err
415 }
416 }
417
418 return nil
419}
420
Nan Zhang5925b0f2017-12-19 15:13:40 -0800421// Sets the given directory and all its ancestor directories as Python packages.
422func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
423 for pkgPath != "" {
424 if _, found := existingPyPkgSet[pkgPath]; !found {
425 existingPyPkgSet[pkgPath] = true
426 *newPyPkgs = append(*newPyPkgs, pkgPath)
427 // Gets its ancestor directory by trimming last slash.
428 pkgPath = pathBeforeLastSlash(pkgPath)
429 } else {
430 break
431 }
432 }
433}
434
435func pathBeforeLastSlash(path string) string {
436 ret := filepath.Dir(path)
437 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
438 if ret == "." || ret == "/" {
439 return ""
440 }
441 return ret
442}
443
Colin Cross4c03f682018-07-15 08:16:31 -0700444func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
Colin Cross0cf45cd2017-10-04 17:04:16 -0700445 for _, dir := range stripDirs {
Colin Cross4c03f682018-07-15 08:16:31 -0700446 dir = filepath.Clean(dir)
447 patterns := []string{
448 dir + "/", // the directory itself
449 dir + "/**/*", // files recursively in the directory
450 dir + "/**/*/", // directories recursively in the directory
451 }
452
453 for _, pattern := range patterns {
454 match, err := pathtools.Match(pattern, name)
455 if err != nil {
456 return false, fmt.Errorf("%s: %s", err.Error(), pattern)
457 } else if match {
458 if emulateJar {
459 // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
460 // requested.
461 // TODO(ccross): which files does this affect?
462 if name != jar.MetaDir && name != jar.ManifestFile {
463 return true, nil
464 }
Colin Cross0cf45cd2017-10-04 17:04:16 -0700465 }
Colin Cross4c03f682018-07-15 08:16:31 -0700466 return true, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700467 }
468 }
469 }
Colin Cross4c03f682018-07-15 08:16:31 -0700470
Colin Cross0cf45cd2017-10-04 17:04:16 -0700471 for _, pattern := range stripFiles {
Colin Cross4c03f682018-07-15 08:16:31 -0700472 if match, err := pathtools.Match(pattern, name); err != nil {
473 return false, fmt.Errorf("%s: %s", err.Error(), pattern)
Colin Cross0cf45cd2017-10-04 17:04:16 -0700474 } else if match {
Colin Cross4c03f682018-07-15 08:16:31 -0700475 return true, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700476 }
477 }
Colin Cross4c03f682018-07-15 08:16:31 -0700478 return false, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700479}
480
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700481func jarSort(files []fileMapping) {
482 sort.SliceStable(files, func(i, j int) bool {
483 return jar.EntryNamesLess(files[i].dest, files[j].dest)
484 })
485}
486
487func alphanumericSort(files []fileMapping) {
488 sort.SliceStable(files, func(i, j int) bool {
489 return files[i].dest < files[j].dest
490 })
491}