blob: 84eace608c6fbc1e8a570964b49a7bb2b44823a7 [file] [log] [blame]
Jeff Gaston8bab5f22017-09-01 13:34:28 -07001// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
Colin Cross635acc92017-09-12 22:50:46 -070018 "errors"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070019 "flag"
20 "fmt"
Colin Cross635acc92017-09-12 22:50:46 -070021 "hash/crc32"
Nan Zhang5925b0f2017-12-19 15:13:40 -080022 "io/ioutil"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070023 "log"
24 "os"
Nan Zhang13f4cf52017-09-19 18:42:01 -070025 "path/filepath"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070026 "sort"
27 "strings"
28
29 "android/soong/jar"
30 "android/soong/third_party/zip"
31)
32
Colin Cross0cf45cd2017-10-04 17:04:16 -070033type fileList []string
Nan Zhangd5998cc2017-09-13 13:17:43 -070034
Colin Cross0cf45cd2017-10-04 17:04:16 -070035func (f *fileList) String() string {
Nan Zhangd5998cc2017-09-13 13:17:43 -070036 return `""`
37}
38
Colin Cross0cf45cd2017-10-04 17:04:16 -070039func (f *fileList) Set(name string) error {
40 *f = append(*f, filepath.Clean(name))
Nan Zhang13f4cf52017-09-19 18:42:01 -070041
42 return nil
43}
44
Colin Cross0cf45cd2017-10-04 17:04:16 -070045type zipsToNotStripSet map[string]bool
Nan Zhang13f4cf52017-09-19 18:42:01 -070046
Colin Cross0cf45cd2017-10-04 17:04:16 -070047func (s zipsToNotStripSet) String() string {
Nan Zhang13f4cf52017-09-19 18:42:01 -070048 return `""`
49}
50
Colin Cross0cf45cd2017-10-04 17:04:16 -070051func (s zipsToNotStripSet) Set(zip_path string) error {
52 s[zip_path] = true
Nan Zhangd5998cc2017-09-13 13:17:43 -070053
54 return nil
55}
56
Jeff Gaston8bab5f22017-09-01 13:34:28 -070057var (
Colin Crosse909e1e2017-11-22 14:09:40 -080058 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
59 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
Nan Zhang5925b0f2017-12-19 15:13:40 -080060 emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
Colin Crosse909e1e2017-11-22 14:09:40 -080061 stripDirs fileList
62 stripFiles fileList
63 zipsToNotStrip = make(zipsToNotStripSet)
64 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
65 manifest = flag.String("m", "", "manifest file to insert in jar")
Nan Zhang1db85402017-12-18 13:20:23 -080066 pyMain = flag.String("pm", "", "__main__.py file to insert in par")
Nan Zhang5925b0f2017-12-19 15:13:40 -080067 entrypoint = flag.String("e", "", "par entrypoint file to insert in par")
Colin Crosse909e1e2017-11-22 14:09:40 -080068 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070069)
70
Nan Zhangd5998cc2017-09-13 13:17:43 -070071func init() {
Colin Cross0cf45cd2017-10-04 17:04:16 -070072 flag.Var(&stripDirs, "stripDir", "the prefix of file path to be excluded from the output zip")
73 flag.Var(&stripFiles, "stripFile", "filenames to be excluded from the output zip, accepts wildcards")
74 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
Nan Zhangd5998cc2017-09-13 13:17:43 -070075}
76
Jeff Gaston8bab5f22017-09-01 13:34:28 -070077func main() {
78 flag.Usage = func() {
Nan Zhang1db85402017-12-18 13:20:23 -080079 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [-e entrypoint] [-pm __main__.py] output [inputs...]")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070080 flag.PrintDefaults()
81 }
82
83 // parse args
84 flag.Parse()
85 args := flag.Args()
Colin Cross5c6ecc12017-10-23 18:12:27 -070086 if len(args) < 1 {
Jeff Gaston8bab5f22017-09-01 13:34:28 -070087 flag.Usage()
88 os.Exit(1)
89 }
90 outputPath := args[0]
91 inputs := args[1:]
92
93 log.SetFlags(log.Lshortfile)
94
95 // make writer
96 output, err := os.Create(outputPath)
97 if err != nil {
98 log.Fatal(err)
99 }
100 defer output.Close()
101 writer := zip.NewWriter(output)
102 defer func() {
103 err := writer.Close()
104 if err != nil {
105 log.Fatal(err)
106 }
107 }()
108
109 // make readers
110 readers := []namedZipReader{}
111 for _, input := range inputs {
112 reader, err := zip.OpenReader(input)
113 if err != nil {
114 log.Fatal(err)
115 }
116 defer reader.Close()
Colin Cross24860652018-07-14 22:19:14 -0700117 namedReader := namedZipReader{path: input, reader: &reader.Reader}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700118 readers = append(readers, namedReader)
119 }
120
Colin Cross635acc92017-09-12 22:50:46 -0700121 if *manifest != "" && !*emulateJar {
122 log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
123 }
124
Nan Zhang5925b0f2017-12-19 15:13:40 -0800125 if *entrypoint != "" && !*emulatePar {
126 log.Fatal(errors.New("must specify -p when specifying a entrypoint via -e"))
127 }
128
Nan Zhang1db85402017-12-18 13:20:23 -0800129 if *pyMain != "" && !*emulatePar {
130 log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
131 }
132
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700133 // do merge
Nan Zhang1db85402017-12-18 13:20:23 -0800134 err = mergeZips(readers, writer, *manifest, *entrypoint, *pyMain, *sortEntries, *emulateJar, *emulatePar,
Colin Cross24860652018-07-14 22:19:14 -0700135 *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
Colin Cross635acc92017-09-12 22:50:46 -0700136 if err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700137 log.Fatal(err)
138 }
139}
140
141// a namedZipReader reads a .zip file and can say which file it's reading
142type namedZipReader struct {
143 path string
Colin Cross24860652018-07-14 22:19:14 -0700144 reader *zip.Reader
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700145}
146
147// a zipEntryPath refers to a file contained in a zip
148type zipEntryPath struct {
149 zipName string
150 entryName string
151}
152
153func (p zipEntryPath) String() string {
154 return p.zipName + "/" + p.entryName
155}
156
Colin Cross635acc92017-09-12 22:50:46 -0700157// a zipEntry is a zipSource that pulls its content from another zip
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700158type zipEntry struct {
159 path zipEntryPath
160 content *zip.File
161}
162
Colin Cross635acc92017-09-12 22:50:46 -0700163func (ze zipEntry) String() string {
164 return ze.path.String()
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700165}
166
Colin Cross635acc92017-09-12 22:50:46 -0700167func (ze zipEntry) IsDir() bool {
168 return ze.content.FileInfo().IsDir()
169}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700170
Colin Cross635acc92017-09-12 22:50:46 -0700171func (ze zipEntry) CRC32() uint32 {
172 return ze.content.FileHeader.CRC32
173}
174
175func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
176 return zw.CopyFrom(ze.content, dest)
177}
178
179// a bufferEntry is a zipSource that pulls its content from a []byte
180type bufferEntry struct {
181 fh *zip.FileHeader
182 content []byte
183}
184
185func (be bufferEntry) String() string {
186 return "internal buffer"
187}
188
189func (be bufferEntry) IsDir() bool {
190 return be.fh.FileInfo().IsDir()
191}
192
193func (be bufferEntry) CRC32() uint32 {
194 return crc32.ChecksumIEEE(be.content)
195}
196
197func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
198 w, err := zw.CreateHeader(be.fh)
199 if err != nil {
200 return err
201 }
202
203 if !be.IsDir() {
204 _, err = w.Write(be.content)
205 if err != nil {
206 return err
207 }
208 }
209
210 return nil
211}
212
213type zipSource interface {
214 String() string
215 IsDir() bool
216 CRC32() uint32
217 WriteToZip(dest string, zw *zip.Writer) error
218}
219
220// a fileMapping specifies to copy a zip entry from one place to another
221type fileMapping struct {
222 dest string
223 source zipSource
224}
225
Nan Zhang1db85402017-12-18 13:20:23 -0800226func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, entrypoint, pyMain string,
Colin Cross24860652018-07-14 22:19:14 -0700227 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
228 stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
Colin Cross635acc92017-09-12 22:50:46 -0700229
230 sourceByDest := make(map[string]zipSource, 0)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700231 orderedMappings := []fileMapping{}
232
Colin Cross635acc92017-09-12 22:50:46 -0700233 // if dest already exists returns a non-null zipSource for the existing source
234 addMapping := func(dest string, source zipSource) zipSource {
235 mapKey := filepath.Clean(dest)
236 if existingSource, exists := sourceByDest[mapKey]; exists {
237 return existingSource
238 }
239
240 sourceByDest[mapKey] = source
241 orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
242 return nil
243 }
244
245 if manifest != "" {
246 if !stripDirEntries {
247 dirHeader := jar.MetaDirFileHeader()
248 dirSource := bufferEntry{dirHeader, nil}
249 addMapping(jar.MetaDir, dirSource)
250 }
251
252 fh, buf, err := jar.ManifestFileContents(manifest)
253 if err != nil {
254 return err
255 }
256
257 fileSource := bufferEntry{fh, buf}
258 addMapping(jar.ManifestFile, fileSource)
259 }
260
Nan Zhang5925b0f2017-12-19 15:13:40 -0800261 if entrypoint != "" {
262 buf, err := ioutil.ReadFile(entrypoint)
263 if err != nil {
264 return err
265 }
266 fh := &zip.FileHeader{
267 Name: "entry_point.txt",
268 Method: zip.Store,
269 UncompressedSize64: uint64(len(buf)),
270 }
271 fh.SetMode(0700)
272 fh.SetModTime(jar.DefaultTime)
273 fileSource := bufferEntry{fh, buf}
274 addMapping("entry_point.txt", fileSource)
275 }
276
Nan Zhang1db85402017-12-18 13:20:23 -0800277 if pyMain != "" {
278 buf, err := ioutil.ReadFile(pyMain)
279 if err != nil {
280 return err
281 }
282 fh := &zip.FileHeader{
283 Name: "__main__.py",
284 Method: zip.Store,
285 UncompressedSize64: uint64(len(buf)),
286 }
287 fh.SetMode(0700)
288 fh.SetModTime(jar.DefaultTime)
289 fileSource := bufferEntry{fh, buf}
290 addMapping("__main__.py", fileSource)
291 }
292
Nan Zhang5925b0f2017-12-19 15:13:40 -0800293 if emulatePar {
294 // the runfiles packages needs to be populated with "__init__.py".
295 newPyPkgs := []string{}
296 // the runfiles dirs have been treated as packages.
297 existingPyPkgSet := make(map[string]bool)
298 // put existing __init__.py files to a set first. This set is used for preventing
299 // generated __init__.py files from overwriting existing ones.
300 for _, namedReader := range readers {
301 for _, file := range namedReader.reader.File {
302 if filepath.Base(file.Name) != "__init__.py" {
303 continue
304 }
305 pyPkg := pathBeforeLastSlash(file.Name)
306 if _, found := existingPyPkgSet[pyPkg]; found {
307 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
308 } else {
309 existingPyPkgSet[pyPkg] = true
310 }
311 }
312 }
313 for _, namedReader := range readers {
314 for _, file := range namedReader.reader.File {
315 var parentPath string /* the path after trimming last "/" */
316 if filepath.Base(file.Name) == "__init__.py" {
317 // for existing __init__.py files, we should trim last "/" for twice.
318 // eg. a/b/c/__init__.py ---> a/b
319 parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
320 } else {
321 parentPath = pathBeforeLastSlash(file.Name)
322 }
323 populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
324 }
325 }
326 for _, pkg := range newPyPkgs {
327 var emptyBuf []byte
328 fh := &zip.FileHeader{
329 Name: filepath.Join(pkg, "__init__.py"),
330 Method: zip.Store,
331 UncompressedSize64: uint64(len(emptyBuf)),
332 }
333 fh.SetMode(0700)
334 fh.SetModTime(jar.DefaultTime)
335 fileSource := bufferEntry{fh, emptyBuf}
336 addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
337 }
338 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700339 for _, namedReader := range readers {
Nan Zhang13f4cf52017-09-19 18:42:01 -0700340 _, skipStripThisZip := zipsToNotStrip[namedReader.path]
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700341 for _, file := range namedReader.reader.File {
Colin Cross24860652018-07-14 22:19:14 -0700342 if !skipStripThisZip && shouldStripFile(emulateJar, stripFiles, stripDirs, file.Name) {
Colin Cross0cf45cd2017-10-04 17:04:16 -0700343 continue
Nan Zhangd5998cc2017-09-13 13:17:43 -0700344 }
Colin Cross635acc92017-09-12 22:50:46 -0700345
346 if stripDirEntries && file.FileInfo().IsDir() {
347 continue
348 }
349
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700350 // check for other files or directories destined for the same path
351 dest := file.Name
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700352
353 // make a new entry to add
354 source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700355
Colin Cross635acc92017-09-12 22:50:46 -0700356 if existingSource := addMapping(dest, source); existingSource != nil {
Colin Cross34540312017-09-06 12:52:37 -0700357 // handle duplicates
Colin Cross635acc92017-09-12 22:50:46 -0700358 if existingSource.IsDir() != source.IsDir() {
Colin Cross34540312017-09-06 12:52:37 -0700359 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700360 dest, existingSource, source)
Colin Cross34540312017-09-06 12:52:37 -0700361 }
Colin Crosse909e1e2017-11-22 14:09:40 -0800362 if ignoreDuplicates {
363 continue
364 }
Colin Cross34540312017-09-06 12:52:37 -0700365 if emulateJar &&
366 file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
367 // Skip manifest and module info files that are not from the first input file
368 continue
369 }
Colin Cross635acc92017-09-12 22:50:46 -0700370 if !source.IsDir() {
Nan Zhangd5998cc2017-09-13 13:17:43 -0700371 if emulateJar {
Colin Cross635acc92017-09-12 22:50:46 -0700372 if existingSource.CRC32() != source.CRC32() {
Nan Zhangd5998cc2017-09-13 13:17:43 -0700373 fmt.Fprintf(os.Stdout, "WARNING: Duplicate path %v found in %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700374 dest, existingSource, source)
Nan Zhangd5998cc2017-09-13 13:17:43 -0700375 }
376 } else {
377 return fmt.Errorf("Duplicate path %v found in %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700378 dest, existingSource, source)
Nan Zhangd5998cc2017-09-13 13:17:43 -0700379 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700380 }
381 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700382 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700383 }
384
Colin Cross34540312017-09-06 12:52:37 -0700385 if emulateJar {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700386 jarSort(orderedMappings)
387 } else if sortEntries {
388 alphanumericSort(orderedMappings)
389 }
390
391 for _, entry := range orderedMappings {
Colin Cross635acc92017-09-12 22:50:46 -0700392 if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700393 return err
394 }
395 }
396
397 return nil
398}
399
Nan Zhang5925b0f2017-12-19 15:13:40 -0800400// Sets the given directory and all its ancestor directories as Python packages.
401func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
402 for pkgPath != "" {
403 if _, found := existingPyPkgSet[pkgPath]; !found {
404 existingPyPkgSet[pkgPath] = true
405 *newPyPkgs = append(*newPyPkgs, pkgPath)
406 // Gets its ancestor directory by trimming last slash.
407 pkgPath = pathBeforeLastSlash(pkgPath)
408 } else {
409 break
410 }
411 }
412}
413
414func pathBeforeLastSlash(path string) string {
415 ret := filepath.Dir(path)
416 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
417 if ret == "." || ret == "/" {
418 return ""
419 }
420 return ret
421}
422
Colin Cross24860652018-07-14 22:19:14 -0700423func shouldStripFile(emulateJar bool, stripFiles, stripDirs []string, name string) bool {
Colin Cross0cf45cd2017-10-04 17:04:16 -0700424 for _, dir := range stripDirs {
425 if strings.HasPrefix(name, dir+"/") {
426 if emulateJar {
427 if name != jar.MetaDir && name != jar.ManifestFile {
428 return true
429 }
430 } else {
431 return true
432 }
433 }
434 }
435 for _, pattern := range stripFiles {
436 if match, err := filepath.Match(pattern, filepath.Base(name)); err != nil {
437 panic(fmt.Errorf("%s: %s", err.Error(), pattern))
438 } else if match {
439 return true
440 }
441 }
442 return false
443}
444
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700445func jarSort(files []fileMapping) {
446 sort.SliceStable(files, func(i, j int) bool {
447 return jar.EntryNamesLess(files[i].dest, files[j].dest)
448 })
449}
450
451func alphanumericSort(files []fileMapping) {
452 sort.SliceStable(files, func(i, j int) bool {
453 return files[i].dest < files[j].dest
454 })
455}