blob: c21da44b7dbc4d4d352ac5c0954e18f914712768 [file] [log] [blame]
Jeff Gaston8bab5f22017-09-01 13:34:28 -07001// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
Colin Cross635acc92017-09-12 22:50:46 -070018 "errors"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070019 "flag"
20 "fmt"
Colin Cross635acc92017-09-12 22:50:46 -070021 "hash/crc32"
Dan Willemsen263dde72018-11-15 19:15:02 -080022 "io"
Nan Zhang5925b0f2017-12-19 15:13:40 -080023 "io/ioutil"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070024 "log"
25 "os"
Nan Zhang13f4cf52017-09-19 18:42:01 -070026 "path/filepath"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070027 "sort"
Colin Cross4c03f682018-07-15 08:16:31 -070028
29 "github.com/google/blueprint/pathtools"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070030
31 "android/soong/jar"
32 "android/soong/third_party/zip"
33)
34
Colin Cross0cf45cd2017-10-04 17:04:16 -070035type fileList []string
Nan Zhangd5998cc2017-09-13 13:17:43 -070036
Colin Cross0cf45cd2017-10-04 17:04:16 -070037func (f *fileList) String() string {
Nan Zhangd5998cc2017-09-13 13:17:43 -070038 return `""`
39}
40
Colin Cross0cf45cd2017-10-04 17:04:16 -070041func (f *fileList) Set(name string) error {
42 *f = append(*f, filepath.Clean(name))
Nan Zhang13f4cf52017-09-19 18:42:01 -070043
44 return nil
45}
46
Colin Cross0cf45cd2017-10-04 17:04:16 -070047type zipsToNotStripSet map[string]bool
Nan Zhang13f4cf52017-09-19 18:42:01 -070048
Colin Cross0cf45cd2017-10-04 17:04:16 -070049func (s zipsToNotStripSet) String() string {
Nan Zhang13f4cf52017-09-19 18:42:01 -070050 return `""`
51}
52
Colin Cross0cf45cd2017-10-04 17:04:16 -070053func (s zipsToNotStripSet) Set(zip_path string) error {
54 s[zip_path] = true
Nan Zhangd5998cc2017-09-13 13:17:43 -070055
56 return nil
57}
58
Jeff Gaston8bab5f22017-09-01 13:34:28 -070059var (
Colin Crosse909e1e2017-11-22 14:09:40 -080060 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
61 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
Nan Zhang5925b0f2017-12-19 15:13:40 -080062 emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
Colin Crosse909e1e2017-11-22 14:09:40 -080063 stripDirs fileList
64 stripFiles fileList
65 zipsToNotStrip = make(zipsToNotStripSet)
66 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
67 manifest = flag.String("m", "", "manifest file to insert in jar")
Nan Zhang1db85402017-12-18 13:20:23 -080068 pyMain = flag.String("pm", "", "__main__.py file to insert in par")
Nan Zhang5925b0f2017-12-19 15:13:40 -080069 entrypoint = flag.String("e", "", "par entrypoint file to insert in par")
Dan Willemsen263dde72018-11-15 19:15:02 -080070 prefix = flag.String("prefix", "", "A file to prefix to the zip file")
Colin Crosse909e1e2017-11-22 14:09:40 -080071 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070072)
73
Nan Zhangd5998cc2017-09-13 13:17:43 -070074func init() {
Colin Cross4c03f682018-07-15 08:16:31 -070075 flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
76 flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
Colin Cross0cf45cd2017-10-04 17:04:16 -070077 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
Nan Zhangd5998cc2017-09-13 13:17:43 -070078}
79
Jeff Gaston8bab5f22017-09-01 13:34:28 -070080func main() {
81 flag.Usage = func() {
Dan Willemsen263dde72018-11-15 19:15:02 -080082 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-e entrypoint] [-pm __main__.py] output [inputs...]")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070083 flag.PrintDefaults()
84 }
85
86 // parse args
87 flag.Parse()
88 args := flag.Args()
Colin Cross5c6ecc12017-10-23 18:12:27 -070089 if len(args) < 1 {
Jeff Gaston8bab5f22017-09-01 13:34:28 -070090 flag.Usage()
91 os.Exit(1)
92 }
93 outputPath := args[0]
94 inputs := args[1:]
95
96 log.SetFlags(log.Lshortfile)
97
98 // make writer
99 output, err := os.Create(outputPath)
100 if err != nil {
101 log.Fatal(err)
102 }
103 defer output.Close()
Dan Willemsen263dde72018-11-15 19:15:02 -0800104
105 var offset int64
106 if *prefix != "" {
107 prefixFile, err := os.Open(*prefix)
108 if err != nil {
109 log.Fatal(err)
110 }
111 offset, err = io.Copy(output, prefixFile)
112 if err != nil {
113 log.Fatal(err)
114 }
115 }
116
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700117 writer := zip.NewWriter(output)
118 defer func() {
119 err := writer.Close()
120 if err != nil {
121 log.Fatal(err)
122 }
123 }()
Dan Willemsen263dde72018-11-15 19:15:02 -0800124 writer.SetOffset(offset)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700125
126 // make readers
127 readers := []namedZipReader{}
128 for _, input := range inputs {
129 reader, err := zip.OpenReader(input)
130 if err != nil {
131 log.Fatal(err)
132 }
133 defer reader.Close()
Colin Cross24860652018-07-14 22:19:14 -0700134 namedReader := namedZipReader{path: input, reader: &reader.Reader}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700135 readers = append(readers, namedReader)
136 }
137
Colin Cross635acc92017-09-12 22:50:46 -0700138 if *manifest != "" && !*emulateJar {
139 log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
140 }
141
Nan Zhang5925b0f2017-12-19 15:13:40 -0800142 if *entrypoint != "" && !*emulatePar {
143 log.Fatal(errors.New("must specify -p when specifying a entrypoint via -e"))
144 }
145
Nan Zhang1db85402017-12-18 13:20:23 -0800146 if *pyMain != "" && !*emulatePar {
147 log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
148 }
149
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700150 // do merge
Nan Zhang1db85402017-12-18 13:20:23 -0800151 err = mergeZips(readers, writer, *manifest, *entrypoint, *pyMain, *sortEntries, *emulateJar, *emulatePar,
Colin Cross24860652018-07-14 22:19:14 -0700152 *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
Colin Cross635acc92017-09-12 22:50:46 -0700153 if err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700154 log.Fatal(err)
155 }
156}
157
158// a namedZipReader reads a .zip file and can say which file it's reading
159type namedZipReader struct {
160 path string
Colin Cross24860652018-07-14 22:19:14 -0700161 reader *zip.Reader
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700162}
163
164// a zipEntryPath refers to a file contained in a zip
165type zipEntryPath struct {
166 zipName string
167 entryName string
168}
169
170func (p zipEntryPath) String() string {
171 return p.zipName + "/" + p.entryName
172}
173
Colin Cross635acc92017-09-12 22:50:46 -0700174// a zipEntry is a zipSource that pulls its content from another zip
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700175type zipEntry struct {
176 path zipEntryPath
177 content *zip.File
178}
179
Colin Cross635acc92017-09-12 22:50:46 -0700180func (ze zipEntry) String() string {
181 return ze.path.String()
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700182}
183
Colin Cross635acc92017-09-12 22:50:46 -0700184func (ze zipEntry) IsDir() bool {
185 return ze.content.FileInfo().IsDir()
186}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700187
Colin Cross635acc92017-09-12 22:50:46 -0700188func (ze zipEntry) CRC32() uint32 {
189 return ze.content.FileHeader.CRC32
190}
191
Colin Crossdc1e8292018-10-17 15:05:56 -0700192func (ze zipEntry) Size() uint64 {
193 return ze.content.FileHeader.UncompressedSize64
194}
195
Colin Cross635acc92017-09-12 22:50:46 -0700196func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
197 return zw.CopyFrom(ze.content, dest)
198}
199
200// a bufferEntry is a zipSource that pulls its content from a []byte
201type bufferEntry struct {
202 fh *zip.FileHeader
203 content []byte
204}
205
206func (be bufferEntry) String() string {
207 return "internal buffer"
208}
209
210func (be bufferEntry) IsDir() bool {
211 return be.fh.FileInfo().IsDir()
212}
213
214func (be bufferEntry) CRC32() uint32 {
215 return crc32.ChecksumIEEE(be.content)
216}
217
Colin Crossdc1e8292018-10-17 15:05:56 -0700218func (be bufferEntry) Size() uint64 {
219 return uint64(len(be.content))
220}
221
Colin Cross635acc92017-09-12 22:50:46 -0700222func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
223 w, err := zw.CreateHeader(be.fh)
224 if err != nil {
225 return err
226 }
227
228 if !be.IsDir() {
229 _, err = w.Write(be.content)
230 if err != nil {
231 return err
232 }
233 }
234
235 return nil
236}
237
238type zipSource interface {
239 String() string
240 IsDir() bool
241 CRC32() uint32
Colin Crossdc1e8292018-10-17 15:05:56 -0700242 Size() uint64
Colin Cross635acc92017-09-12 22:50:46 -0700243 WriteToZip(dest string, zw *zip.Writer) error
244}
245
246// a fileMapping specifies to copy a zip entry from one place to another
247type fileMapping struct {
248 dest string
249 source zipSource
250}
251
Nan Zhang1db85402017-12-18 13:20:23 -0800252func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, entrypoint, pyMain string,
Colin Cross24860652018-07-14 22:19:14 -0700253 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
254 stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
Colin Cross635acc92017-09-12 22:50:46 -0700255
256 sourceByDest := make(map[string]zipSource, 0)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700257 orderedMappings := []fileMapping{}
258
Colin Cross635acc92017-09-12 22:50:46 -0700259 // if dest already exists returns a non-null zipSource for the existing source
260 addMapping := func(dest string, source zipSource) zipSource {
261 mapKey := filepath.Clean(dest)
262 if existingSource, exists := sourceByDest[mapKey]; exists {
263 return existingSource
264 }
265
266 sourceByDest[mapKey] = source
267 orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
268 return nil
269 }
270
271 if manifest != "" {
272 if !stripDirEntries {
273 dirHeader := jar.MetaDirFileHeader()
274 dirSource := bufferEntry{dirHeader, nil}
275 addMapping(jar.MetaDir, dirSource)
276 }
277
Colin Cross05518bc2018-09-27 15:06:19 -0700278 contents, err := ioutil.ReadFile(manifest)
279 if err != nil {
280 return err
281 }
282
283 fh, buf, err := jar.ManifestFileContents(contents)
Colin Cross635acc92017-09-12 22:50:46 -0700284 if err != nil {
285 return err
286 }
287
288 fileSource := bufferEntry{fh, buf}
289 addMapping(jar.ManifestFile, fileSource)
290 }
291
Nan Zhang5925b0f2017-12-19 15:13:40 -0800292 if entrypoint != "" {
293 buf, err := ioutil.ReadFile(entrypoint)
294 if err != nil {
295 return err
296 }
297 fh := &zip.FileHeader{
298 Name: "entry_point.txt",
299 Method: zip.Store,
300 UncompressedSize64: uint64(len(buf)),
301 }
302 fh.SetMode(0700)
303 fh.SetModTime(jar.DefaultTime)
304 fileSource := bufferEntry{fh, buf}
305 addMapping("entry_point.txt", fileSource)
306 }
307
Nan Zhang1db85402017-12-18 13:20:23 -0800308 if pyMain != "" {
309 buf, err := ioutil.ReadFile(pyMain)
310 if err != nil {
311 return err
312 }
313 fh := &zip.FileHeader{
314 Name: "__main__.py",
315 Method: zip.Store,
316 UncompressedSize64: uint64(len(buf)),
317 }
318 fh.SetMode(0700)
319 fh.SetModTime(jar.DefaultTime)
320 fileSource := bufferEntry{fh, buf}
321 addMapping("__main__.py", fileSource)
322 }
323
Nan Zhang5925b0f2017-12-19 15:13:40 -0800324 if emulatePar {
325 // the runfiles packages needs to be populated with "__init__.py".
326 newPyPkgs := []string{}
327 // the runfiles dirs have been treated as packages.
328 existingPyPkgSet := make(map[string]bool)
329 // put existing __init__.py files to a set first. This set is used for preventing
330 // generated __init__.py files from overwriting existing ones.
331 for _, namedReader := range readers {
332 for _, file := range namedReader.reader.File {
333 if filepath.Base(file.Name) != "__init__.py" {
334 continue
335 }
336 pyPkg := pathBeforeLastSlash(file.Name)
337 if _, found := existingPyPkgSet[pyPkg]; found {
338 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
339 } else {
340 existingPyPkgSet[pyPkg] = true
341 }
342 }
343 }
344 for _, namedReader := range readers {
345 for _, file := range namedReader.reader.File {
346 var parentPath string /* the path after trimming last "/" */
347 if filepath.Base(file.Name) == "__init__.py" {
348 // for existing __init__.py files, we should trim last "/" for twice.
349 // eg. a/b/c/__init__.py ---> a/b
350 parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
351 } else {
352 parentPath = pathBeforeLastSlash(file.Name)
353 }
354 populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
355 }
356 }
357 for _, pkg := range newPyPkgs {
358 var emptyBuf []byte
359 fh := &zip.FileHeader{
360 Name: filepath.Join(pkg, "__init__.py"),
361 Method: zip.Store,
362 UncompressedSize64: uint64(len(emptyBuf)),
363 }
364 fh.SetMode(0700)
365 fh.SetModTime(jar.DefaultTime)
366 fileSource := bufferEntry{fh, emptyBuf}
367 addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
368 }
369 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700370 for _, namedReader := range readers {
Nan Zhang13f4cf52017-09-19 18:42:01 -0700371 _, skipStripThisZip := zipsToNotStrip[namedReader.path]
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700372 for _, file := range namedReader.reader.File {
Colin Cross4c03f682018-07-15 08:16:31 -0700373 if !skipStripThisZip {
374 if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
375 return err
376 } else if skip {
377 continue
378 }
Nan Zhangd5998cc2017-09-13 13:17:43 -0700379 }
Colin Cross635acc92017-09-12 22:50:46 -0700380
381 if stripDirEntries && file.FileInfo().IsDir() {
382 continue
383 }
384
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700385 // check for other files or directories destined for the same path
386 dest := file.Name
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700387
388 // make a new entry to add
389 source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700390
Colin Cross635acc92017-09-12 22:50:46 -0700391 if existingSource := addMapping(dest, source); existingSource != nil {
Colin Cross34540312017-09-06 12:52:37 -0700392 // handle duplicates
Colin Cross635acc92017-09-12 22:50:46 -0700393 if existingSource.IsDir() != source.IsDir() {
Colin Cross34540312017-09-06 12:52:37 -0700394 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700395 dest, existingSource, source)
Colin Cross34540312017-09-06 12:52:37 -0700396 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700397
Colin Crosse909e1e2017-11-22 14:09:40 -0800398 if ignoreDuplicates {
399 continue
400 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700401
Colin Cross34540312017-09-06 12:52:37 -0700402 if emulateJar &&
403 file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
404 // Skip manifest and module info files that are not from the first input file
405 continue
406 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700407
408 if source.IsDir() {
409 continue
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700410 }
Colin Crossdc1e8292018-10-17 15:05:56 -0700411
412 if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
413 continue
414 }
415
416 return fmt.Errorf("Duplicate path %v found in %v and %v\n",
417 dest, existingSource, source)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700418 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700419 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700420 }
421
Colin Cross34540312017-09-06 12:52:37 -0700422 if emulateJar {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700423 jarSort(orderedMappings)
424 } else if sortEntries {
425 alphanumericSort(orderedMappings)
426 }
427
428 for _, entry := range orderedMappings {
Colin Cross635acc92017-09-12 22:50:46 -0700429 if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700430 return err
431 }
432 }
433
434 return nil
435}
436
Nan Zhang5925b0f2017-12-19 15:13:40 -0800437// Sets the given directory and all its ancestor directories as Python packages.
438func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
439 for pkgPath != "" {
440 if _, found := existingPyPkgSet[pkgPath]; !found {
441 existingPyPkgSet[pkgPath] = true
442 *newPyPkgs = append(*newPyPkgs, pkgPath)
443 // Gets its ancestor directory by trimming last slash.
444 pkgPath = pathBeforeLastSlash(pkgPath)
445 } else {
446 break
447 }
448 }
449}
450
451func pathBeforeLastSlash(path string) string {
452 ret := filepath.Dir(path)
453 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
454 if ret == "." || ret == "/" {
455 return ""
456 }
457 return ret
458}
459
Colin Cross4c03f682018-07-15 08:16:31 -0700460func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
Colin Cross0cf45cd2017-10-04 17:04:16 -0700461 for _, dir := range stripDirs {
Colin Cross4c03f682018-07-15 08:16:31 -0700462 dir = filepath.Clean(dir)
463 patterns := []string{
464 dir + "/", // the directory itself
465 dir + "/**/*", // files recursively in the directory
466 dir + "/**/*/", // directories recursively in the directory
467 }
468
469 for _, pattern := range patterns {
470 match, err := pathtools.Match(pattern, name)
471 if err != nil {
472 return false, fmt.Errorf("%s: %s", err.Error(), pattern)
473 } else if match {
474 if emulateJar {
475 // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
476 // requested.
477 // TODO(ccross): which files does this affect?
478 if name != jar.MetaDir && name != jar.ManifestFile {
479 return true, nil
480 }
Colin Cross0cf45cd2017-10-04 17:04:16 -0700481 }
Colin Cross4c03f682018-07-15 08:16:31 -0700482 return true, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700483 }
484 }
485 }
Colin Cross4c03f682018-07-15 08:16:31 -0700486
Colin Cross0cf45cd2017-10-04 17:04:16 -0700487 for _, pattern := range stripFiles {
Colin Cross4c03f682018-07-15 08:16:31 -0700488 if match, err := pathtools.Match(pattern, name); err != nil {
489 return false, fmt.Errorf("%s: %s", err.Error(), pattern)
Colin Cross0cf45cd2017-10-04 17:04:16 -0700490 } else if match {
Colin Cross4c03f682018-07-15 08:16:31 -0700491 return true, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700492 }
493 }
Colin Cross4c03f682018-07-15 08:16:31 -0700494 return false, nil
Colin Cross0cf45cd2017-10-04 17:04:16 -0700495}
496
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700497func jarSort(files []fileMapping) {
498 sort.SliceStable(files, func(i, j int) bool {
499 return jar.EntryNamesLess(files[i].dest, files[j].dest)
500 })
501}
502
503func alphanumericSort(files []fileMapping) {
504 sort.SliceStable(files, func(i, j int) bool {
505 return files[i].dest < files[j].dest
506 })
507}