blob: df04358b8827d0fbc74c6efdc6b44c536962cb66 [file] [log] [blame]
Jeff Gaston8bab5f22017-09-01 13:34:28 -07001// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
Colin Cross635acc92017-09-12 22:50:46 -070018 "errors"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070019 "flag"
20 "fmt"
Colin Cross635acc92017-09-12 22:50:46 -070021 "hash/crc32"
Nan Zhang5925b0f2017-12-19 15:13:40 -080022 "io/ioutil"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070023 "log"
24 "os"
Nan Zhang13f4cf52017-09-19 18:42:01 -070025 "path/filepath"
Jeff Gaston8bab5f22017-09-01 13:34:28 -070026 "sort"
27 "strings"
28
29 "android/soong/jar"
30 "android/soong/third_party/zip"
31)
32
Colin Cross0cf45cd2017-10-04 17:04:16 -070033type fileList []string
Nan Zhangd5998cc2017-09-13 13:17:43 -070034
Colin Cross0cf45cd2017-10-04 17:04:16 -070035func (f *fileList) String() string {
Nan Zhangd5998cc2017-09-13 13:17:43 -070036 return `""`
37}
38
Colin Cross0cf45cd2017-10-04 17:04:16 -070039func (f *fileList) Set(name string) error {
40 *f = append(*f, filepath.Clean(name))
Nan Zhang13f4cf52017-09-19 18:42:01 -070041
42 return nil
43}
44
Colin Cross0cf45cd2017-10-04 17:04:16 -070045type zipsToNotStripSet map[string]bool
Nan Zhang13f4cf52017-09-19 18:42:01 -070046
Colin Cross0cf45cd2017-10-04 17:04:16 -070047func (s zipsToNotStripSet) String() string {
Nan Zhang13f4cf52017-09-19 18:42:01 -070048 return `""`
49}
50
Colin Cross0cf45cd2017-10-04 17:04:16 -070051func (s zipsToNotStripSet) Set(zip_path string) error {
52 s[zip_path] = true
Nan Zhangd5998cc2017-09-13 13:17:43 -070053
54 return nil
55}
56
Jeff Gaston8bab5f22017-09-01 13:34:28 -070057var (
Colin Crosse909e1e2017-11-22 14:09:40 -080058 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
59 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
Nan Zhang5925b0f2017-12-19 15:13:40 -080060 emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
Colin Crosse909e1e2017-11-22 14:09:40 -080061 stripDirs fileList
62 stripFiles fileList
63 zipsToNotStrip = make(zipsToNotStripSet)
64 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
65 manifest = flag.String("m", "", "manifest file to insert in jar")
Nan Zhang5925b0f2017-12-19 15:13:40 -080066 entrypoint = flag.String("e", "", "par entrypoint file to insert in par")
Colin Crosse909e1e2017-11-22 14:09:40 -080067 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070068)
69
Nan Zhangd5998cc2017-09-13 13:17:43 -070070func init() {
Colin Cross0cf45cd2017-10-04 17:04:16 -070071 flag.Var(&stripDirs, "stripDir", "the prefix of file path to be excluded from the output zip")
72 flag.Var(&stripFiles, "stripFile", "filenames to be excluded from the output zip, accepts wildcards")
73 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
Nan Zhangd5998cc2017-09-13 13:17:43 -070074}
75
Jeff Gaston8bab5f22017-09-01 13:34:28 -070076func main() {
77 flag.Usage = func() {
Nan Zhang5925b0f2017-12-19 15:13:40 -080078 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [-e entrypoint] output [inputs...]")
Jeff Gaston8bab5f22017-09-01 13:34:28 -070079 flag.PrintDefaults()
80 }
81
82 // parse args
83 flag.Parse()
84 args := flag.Args()
Colin Cross5c6ecc12017-10-23 18:12:27 -070085 if len(args) < 1 {
Jeff Gaston8bab5f22017-09-01 13:34:28 -070086 flag.Usage()
87 os.Exit(1)
88 }
89 outputPath := args[0]
90 inputs := args[1:]
91
92 log.SetFlags(log.Lshortfile)
93
94 // make writer
95 output, err := os.Create(outputPath)
96 if err != nil {
97 log.Fatal(err)
98 }
99 defer output.Close()
100 writer := zip.NewWriter(output)
101 defer func() {
102 err := writer.Close()
103 if err != nil {
104 log.Fatal(err)
105 }
106 }()
107
108 // make readers
109 readers := []namedZipReader{}
110 for _, input := range inputs {
111 reader, err := zip.OpenReader(input)
112 if err != nil {
113 log.Fatal(err)
114 }
115 defer reader.Close()
116 namedReader := namedZipReader{path: input, reader: reader}
117 readers = append(readers, namedReader)
118 }
119
Colin Cross635acc92017-09-12 22:50:46 -0700120 if *manifest != "" && !*emulateJar {
121 log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
122 }
123
Nan Zhang5925b0f2017-12-19 15:13:40 -0800124 if *entrypoint != "" && !*emulatePar {
125 log.Fatal(errors.New("must specify -p when specifying a entrypoint via -e"))
126 }
127
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700128 // do merge
Nan Zhang5925b0f2017-12-19 15:13:40 -0800129 err = mergeZips(readers, writer, *manifest, *entrypoint, *sortEntries, *emulateJar, *emulatePar,
130 *stripDirEntries, *ignoreDuplicates)
Colin Cross635acc92017-09-12 22:50:46 -0700131 if err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700132 log.Fatal(err)
133 }
134}
135
136// a namedZipReader reads a .zip file and can say which file it's reading
137type namedZipReader struct {
138 path string
139 reader *zip.ReadCloser
140}
141
142// a zipEntryPath refers to a file contained in a zip
143type zipEntryPath struct {
144 zipName string
145 entryName string
146}
147
148func (p zipEntryPath) String() string {
149 return p.zipName + "/" + p.entryName
150}
151
Colin Cross635acc92017-09-12 22:50:46 -0700152// a zipEntry is a zipSource that pulls its content from another zip
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700153type zipEntry struct {
154 path zipEntryPath
155 content *zip.File
156}
157
Colin Cross635acc92017-09-12 22:50:46 -0700158func (ze zipEntry) String() string {
159 return ze.path.String()
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700160}
161
Colin Cross635acc92017-09-12 22:50:46 -0700162func (ze zipEntry) IsDir() bool {
163 return ze.content.FileInfo().IsDir()
164}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700165
Colin Cross635acc92017-09-12 22:50:46 -0700166func (ze zipEntry) CRC32() uint32 {
167 return ze.content.FileHeader.CRC32
168}
169
170func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
171 return zw.CopyFrom(ze.content, dest)
172}
173
174// a bufferEntry is a zipSource that pulls its content from a []byte
175type bufferEntry struct {
176 fh *zip.FileHeader
177 content []byte
178}
179
180func (be bufferEntry) String() string {
181 return "internal buffer"
182}
183
184func (be bufferEntry) IsDir() bool {
185 return be.fh.FileInfo().IsDir()
186}
187
188func (be bufferEntry) CRC32() uint32 {
189 return crc32.ChecksumIEEE(be.content)
190}
191
192func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
193 w, err := zw.CreateHeader(be.fh)
194 if err != nil {
195 return err
196 }
197
198 if !be.IsDir() {
199 _, err = w.Write(be.content)
200 if err != nil {
201 return err
202 }
203 }
204
205 return nil
206}
207
208type zipSource interface {
209 String() string
210 IsDir() bool
211 CRC32() uint32
212 WriteToZip(dest string, zw *zip.Writer) error
213}
214
215// a fileMapping specifies to copy a zip entry from one place to another
216type fileMapping struct {
217 dest string
218 source zipSource
219}
220
Nan Zhang5925b0f2017-12-19 15:13:40 -0800221func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, entrypoint string,
222 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool) error {
Colin Cross635acc92017-09-12 22:50:46 -0700223
224 sourceByDest := make(map[string]zipSource, 0)
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700225 orderedMappings := []fileMapping{}
226
Colin Cross635acc92017-09-12 22:50:46 -0700227 // if dest already exists returns a non-null zipSource for the existing source
228 addMapping := func(dest string, source zipSource) zipSource {
229 mapKey := filepath.Clean(dest)
230 if existingSource, exists := sourceByDest[mapKey]; exists {
231 return existingSource
232 }
233
234 sourceByDest[mapKey] = source
235 orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
236 return nil
237 }
238
239 if manifest != "" {
240 if !stripDirEntries {
241 dirHeader := jar.MetaDirFileHeader()
242 dirSource := bufferEntry{dirHeader, nil}
243 addMapping(jar.MetaDir, dirSource)
244 }
245
246 fh, buf, err := jar.ManifestFileContents(manifest)
247 if err != nil {
248 return err
249 }
250
251 fileSource := bufferEntry{fh, buf}
252 addMapping(jar.ManifestFile, fileSource)
253 }
254
Nan Zhang5925b0f2017-12-19 15:13:40 -0800255 if entrypoint != "" {
256 buf, err := ioutil.ReadFile(entrypoint)
257 if err != nil {
258 return err
259 }
260 fh := &zip.FileHeader{
261 Name: "entry_point.txt",
262 Method: zip.Store,
263 UncompressedSize64: uint64(len(buf)),
264 }
265 fh.SetMode(0700)
266 fh.SetModTime(jar.DefaultTime)
267 fileSource := bufferEntry{fh, buf}
268 addMapping("entry_point.txt", fileSource)
269 }
270
271 if emulatePar {
272 // the runfiles packages needs to be populated with "__init__.py".
273 newPyPkgs := []string{}
274 // the runfiles dirs have been treated as packages.
275 existingPyPkgSet := make(map[string]bool)
276 // put existing __init__.py files to a set first. This set is used for preventing
277 // generated __init__.py files from overwriting existing ones.
278 for _, namedReader := range readers {
279 for _, file := range namedReader.reader.File {
280 if filepath.Base(file.Name) != "__init__.py" {
281 continue
282 }
283 pyPkg := pathBeforeLastSlash(file.Name)
284 if _, found := existingPyPkgSet[pyPkg]; found {
285 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
286 } else {
287 existingPyPkgSet[pyPkg] = true
288 }
289 }
290 }
291 for _, namedReader := range readers {
292 for _, file := range namedReader.reader.File {
293 var parentPath string /* the path after trimming last "/" */
294 if filepath.Base(file.Name) == "__init__.py" {
295 // for existing __init__.py files, we should trim last "/" for twice.
296 // eg. a/b/c/__init__.py ---> a/b
297 parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
298 } else {
299 parentPath = pathBeforeLastSlash(file.Name)
300 }
301 populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
302 }
303 }
304 for _, pkg := range newPyPkgs {
305 var emptyBuf []byte
306 fh := &zip.FileHeader{
307 Name: filepath.Join(pkg, "__init__.py"),
308 Method: zip.Store,
309 UncompressedSize64: uint64(len(emptyBuf)),
310 }
311 fh.SetMode(0700)
312 fh.SetModTime(jar.DefaultTime)
313 fileSource := bufferEntry{fh, emptyBuf}
314 addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
315 }
316 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700317 for _, namedReader := range readers {
Nan Zhang13f4cf52017-09-19 18:42:01 -0700318 _, skipStripThisZip := zipsToNotStrip[namedReader.path]
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700319 for _, file := range namedReader.reader.File {
Colin Cross0cf45cd2017-10-04 17:04:16 -0700320 if !skipStripThisZip && shouldStripFile(emulateJar, file.Name) {
321 continue
Nan Zhangd5998cc2017-09-13 13:17:43 -0700322 }
Colin Cross635acc92017-09-12 22:50:46 -0700323
324 if stripDirEntries && file.FileInfo().IsDir() {
325 continue
326 }
327
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700328 // check for other files or directories destined for the same path
329 dest := file.Name
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700330
331 // make a new entry to add
332 source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700333
Colin Cross635acc92017-09-12 22:50:46 -0700334 if existingSource := addMapping(dest, source); existingSource != nil {
Colin Cross34540312017-09-06 12:52:37 -0700335 // handle duplicates
Colin Cross635acc92017-09-12 22:50:46 -0700336 if existingSource.IsDir() != source.IsDir() {
Colin Cross34540312017-09-06 12:52:37 -0700337 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700338 dest, existingSource, source)
Colin Cross34540312017-09-06 12:52:37 -0700339 }
Colin Crosse909e1e2017-11-22 14:09:40 -0800340 if ignoreDuplicates {
341 continue
342 }
Colin Cross34540312017-09-06 12:52:37 -0700343 if emulateJar &&
344 file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
345 // Skip manifest and module info files that are not from the first input file
346 continue
347 }
Colin Cross635acc92017-09-12 22:50:46 -0700348 if !source.IsDir() {
Nan Zhangd5998cc2017-09-13 13:17:43 -0700349 if emulateJar {
Colin Cross635acc92017-09-12 22:50:46 -0700350 if existingSource.CRC32() != source.CRC32() {
Nan Zhangd5998cc2017-09-13 13:17:43 -0700351 fmt.Fprintf(os.Stdout, "WARNING: Duplicate path %v found in %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700352 dest, existingSource, source)
Nan Zhangd5998cc2017-09-13 13:17:43 -0700353 }
354 } else {
355 return fmt.Errorf("Duplicate path %v found in %v and %v\n",
Colin Cross635acc92017-09-12 22:50:46 -0700356 dest, existingSource, source)
Nan Zhangd5998cc2017-09-13 13:17:43 -0700357 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700358 }
359 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700360 }
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700361 }
362
Colin Cross34540312017-09-06 12:52:37 -0700363 if emulateJar {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700364 jarSort(orderedMappings)
365 } else if sortEntries {
366 alphanumericSort(orderedMappings)
367 }
368
369 for _, entry := range orderedMappings {
Colin Cross635acc92017-09-12 22:50:46 -0700370 if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700371 return err
372 }
373 }
374
375 return nil
376}
377
Nan Zhang5925b0f2017-12-19 15:13:40 -0800378// Sets the given directory and all its ancestor directories as Python packages.
379func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
380 for pkgPath != "" {
381 if _, found := existingPyPkgSet[pkgPath]; !found {
382 existingPyPkgSet[pkgPath] = true
383 *newPyPkgs = append(*newPyPkgs, pkgPath)
384 // Gets its ancestor directory by trimming last slash.
385 pkgPath = pathBeforeLastSlash(pkgPath)
386 } else {
387 break
388 }
389 }
390}
391
392func pathBeforeLastSlash(path string) string {
393 ret := filepath.Dir(path)
394 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
395 if ret == "." || ret == "/" {
396 return ""
397 }
398 return ret
399}
400
Colin Cross0cf45cd2017-10-04 17:04:16 -0700401func shouldStripFile(emulateJar bool, name string) bool {
402 for _, dir := range stripDirs {
403 if strings.HasPrefix(name, dir+"/") {
404 if emulateJar {
405 if name != jar.MetaDir && name != jar.ManifestFile {
406 return true
407 }
408 } else {
409 return true
410 }
411 }
412 }
413 for _, pattern := range stripFiles {
414 if match, err := filepath.Match(pattern, filepath.Base(name)); err != nil {
415 panic(fmt.Errorf("%s: %s", err.Error(), pattern))
416 } else if match {
417 return true
418 }
419 }
420 return false
421}
422
Jeff Gaston8bab5f22017-09-01 13:34:28 -0700423func jarSort(files []fileMapping) {
424 sort.SliceStable(files, func(i, j int) bool {
425 return jar.EntryNamesLess(files[i].dest, files[j].dest)
426 })
427}
428
429func alphanumericSort(files []fileMapping) {
430 sort.SliceStable(files, func(i, j int) bool {
431 return files[i].dest < files[j].dest
432 })
433}