blob: 1ec22659b13202b3692835c1dd46187ed7b5784a [file] [log] [blame]
Colin Cross2fe66872015-03-30 17:20:39 -07001// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
Dan Willemsen017d8932016-08-04 15:43:03 -070018 "bytes"
19 "compress/flate"
Colin Cross2fe66872015-03-30 17:20:39 -070020 "flag"
21 "fmt"
Dan Willemsen017d8932016-08-04 15:43:03 -070022 "hash/crc32"
Colin Cross2fe66872015-03-30 17:20:39 -070023 "io"
24 "io/ioutil"
25 "os"
26 "path/filepath"
Dan Willemsen017d8932016-08-04 15:43:03 -070027 "runtime"
28 "runtime/pprof"
29 "runtime/trace"
Colin Cross2fe66872015-03-30 17:20:39 -070030 "strings"
Dan Willemsen017d8932016-08-04 15:43:03 -070031 "sync"
Colin Cross2fe66872015-03-30 17:20:39 -070032 "time"
Dan Willemsen017d8932016-08-04 15:43:03 -070033
34 "android/soong/third_party/zip"
Colin Cross2fe66872015-03-30 17:20:39 -070035)
36
Dan Willemsen017d8932016-08-04 15:43:03 -070037// Block size used during parallel compression of a single file.
38const parallelBlockSize = 1 * 1024 * 1024 // 1MB
39
40// Minimum file size to use parallel compression. It requires more
41// flate.Writer allocations, since we can't change the dictionary
42// during Reset
43const minParallelFileSize = parallelBlockSize * 6
44
45// Size of the ZIP compression window (32KB)
46const windowSize = 32 * 1024
47
48type nopCloser struct {
49 io.Writer
50}
51
52func (nopCloser) Close() error {
53 return nil
54}
55
Colin Cross2fe66872015-03-30 17:20:39 -070056type fileArg struct {
57 relativeRoot, file string
58}
59
60type fileArgs []fileArg
61
62func (l *fileArgs) String() string {
63 return `""`
64}
65
66func (l *fileArgs) Set(s string) error {
67 if *relativeRoot == "" {
68 return fmt.Errorf("must pass -C before -f")
69 }
70
Dan Willemsena59a3bc2016-08-03 17:47:23 -070071 *l = append(*l, fileArg{filepath.Clean(*relativeRoot), s})
Colin Cross2fe66872015-03-30 17:20:39 -070072 return nil
73}
74
75func (l *fileArgs) Get() interface{} {
76 return l
77}
78
79var (
Dan Willemsen47ec28f2016-08-10 16:12:30 -070080 out = flag.String("o", "", "file to write zip file to")
81 manifest = flag.String("m", "", "input jar manifest file name")
82 directories = flag.Bool("d", false, "include directories in zip")
Colin Cross2fe66872015-03-30 17:20:39 -070083 relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument")
Dan Willemsen017d8932016-08-04 15:43:03 -070084 parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use")
85 compLevel = flag.Int("L", 5, "deflate compression level (0-9)")
Colin Cross2fe66872015-03-30 17:20:39 -070086 listFiles fileArgs
87 files fileArgs
Dan Willemsen017d8932016-08-04 15:43:03 -070088
89 cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
90 traceFile = flag.String("trace", "", "write trace to file")
Colin Cross2fe66872015-03-30 17:20:39 -070091)
92
93func init() {
94 flag.Var(&listFiles, "l", "file containing list of .class files")
Dan Willemsen47ec28f2016-08-10 16:12:30 -070095 flag.Var(&files, "f", "file to include in zip")
Colin Cross2fe66872015-03-30 17:20:39 -070096}
97
98func usage() {
Dan Willemsen47ec28f2016-08-10 16:12:30 -070099 fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n")
Colin Cross2fe66872015-03-30 17:20:39 -0700100 flag.PrintDefaults()
101 os.Exit(2)
102}
103
Colin Crosse19c7932015-04-24 15:08:38 -0700104type zipWriter struct {
Colin Cross2fe66872015-03-30 17:20:39 -0700105 time time.Time
106 createdDirs map[string]bool
107 directories bool
Colin Crosse19c7932015-04-24 15:08:38 -0700108
Dan Willemsen017d8932016-08-04 15:43:03 -0700109 errors chan error
110 writeOps chan chan *zipEntry
111
112 rateLimit *RateLimit
113
114 compressorPool sync.Pool
115 compLevel int
116}
117
118type zipEntry struct {
119 fh *zip.FileHeader
120
121 // List of delayed io.Reader
122 futureReaders chan chan io.Reader
Colin Cross2fe66872015-03-30 17:20:39 -0700123}
124
125func main() {
126 flag.Parse()
127
Dan Willemsen017d8932016-08-04 15:43:03 -0700128 if *cpuProfile != "" {
129 f, err := os.Create(*cpuProfile)
130 if err != nil {
131 fmt.Fprintln(os.Stderr, err.Error())
132 os.Exit(1)
133 }
134 defer f.Close()
135 pprof.StartCPUProfile(f)
136 defer pprof.StopCPUProfile()
137 }
138
139 if *traceFile != "" {
140 f, err := os.Create(*traceFile)
141 if err != nil {
142 fmt.Fprintln(os.Stderr, err.Error())
143 os.Exit(1)
144 }
145 defer f.Close()
146 err = trace.Start(f)
147 if err != nil {
148 fmt.Fprintln(os.Stderr, err.Error())
149 os.Exit(1)
150 }
151 defer trace.Stop()
152 }
153
Colin Cross2fe66872015-03-30 17:20:39 -0700154 if *out == "" {
155 fmt.Fprintf(os.Stderr, "error: -o is required\n")
156 usage()
157 }
158
Colin Crosse19c7932015-04-24 15:08:38 -0700159 w := &zipWriter{
Dan Willemsen77a6b862016-08-04 20:38:47 -0700160 time: time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC),
Colin Cross2fe66872015-03-30 17:20:39 -0700161 createdDirs: make(map[string]bool),
162 directories: *directories,
Dan Willemsen017d8932016-08-04 15:43:03 -0700163 compLevel: *compLevel,
Colin Cross2fe66872015-03-30 17:20:39 -0700164 }
165
Colin Crosse19c7932015-04-24 15:08:38 -0700166 err := w.write(*out, listFiles, *manifest)
Colin Cross2fe66872015-03-30 17:20:39 -0700167 if err != nil {
168 fmt.Fprintln(os.Stderr, err.Error())
169 os.Exit(1)
170 }
171}
172
Colin Crosse19c7932015-04-24 15:08:38 -0700173func (z *zipWriter) write(out string, listFiles fileArgs, manifest string) error {
Colin Cross2fe66872015-03-30 17:20:39 -0700174 f, err := os.Create(out)
175 if err != nil {
176 return err
177 }
178
179 defer f.Close()
180 defer func() {
181 if err != nil {
182 os.Remove(out)
183 }
184 }()
185
Dan Willemsen017d8932016-08-04 15:43:03 -0700186 z.errors = make(chan error)
187 defer close(z.errors)
Colin Cross2fe66872015-03-30 17:20:39 -0700188
Dan Willemsen017d8932016-08-04 15:43:03 -0700189 // This channel size can be essentially unlimited -- it's used as a fifo
190 // queue decouple the CPU and IO loads. Directories don't require any
191 // compression time, but still cost some IO. Similar with small files that
192 // can be very fast to compress. Some files that are more difficult to
193 // compress won't take a corresponding longer time writing out.
194 //
195 // The optimum size here depends on your CPU and IO characteristics, and
196 // the the layout of your zip file. 1000 was chosen mostly at random as
197 // something that worked reasonably well for a test file.
198 //
199 // The RateLimit object will put the upper bounds on the number of
200 // parallel compressions and outstanding buffers.
201 z.writeOps = make(chan chan *zipEntry, 1000)
202 z.rateLimit = NewRateLimit(*parallelJobs, 0)
203 defer z.rateLimit.Stop()
204
205 go func() {
206 var err error
207 defer close(z.writeOps)
208
209 for _, listFile := range listFiles {
210 err = z.writeListFile(listFile)
211 if err != nil {
212 z.errors <- err
213 return
214 }
215 }
216
217 for _, file := range files {
218 err = z.writeRelFile(file.relativeRoot, file.file)
219 if err != nil {
220 z.errors <- err
221 return
222 }
223 }
224
225 if manifest != "" {
226 err = z.writeFile("META-INF/MANIFEST.MF", manifest)
227 if err != nil {
228 z.errors <- err
229 return
230 }
231 }
232 }()
233
234 zipw := zip.NewWriter(f)
235
236 var currentWriteOpChan chan *zipEntry
237 var currentWriter io.WriteCloser
238 var currentReaders chan chan io.Reader
239 var currentReader chan io.Reader
240 var done bool
241
242 for !done {
243 var writeOpsChan chan chan *zipEntry
244 var writeOpChan chan *zipEntry
245 var readersChan chan chan io.Reader
246
247 if currentReader != nil {
248 // Only read and process errors
249 } else if currentReaders != nil {
250 readersChan = currentReaders
251 } else if currentWriteOpChan != nil {
252 writeOpChan = currentWriteOpChan
253 } else {
254 writeOpsChan = z.writeOps
255 }
256
257 select {
258 case writeOp, ok := <-writeOpsChan:
259 if !ok {
260 done = true
261 }
262
263 currentWriteOpChan = writeOp
264
265 case op := <-writeOpChan:
266 currentWriteOpChan = nil
267
268 if op.fh.Method == zip.Deflate {
269 currentWriter, err = zipw.CreateCompressedHeader(op.fh)
270 } else {
271 var zw io.Writer
272 zw, err = zipw.CreateHeader(op.fh)
273 currentWriter = nopCloser{zw}
274 }
275 if err != nil {
276 return err
277 }
278
279 currentReaders = op.futureReaders
280 if op.futureReaders == nil {
281 currentWriter.Close()
282 currentWriter = nil
283 }
284
285 case futureReader, ok := <-readersChan:
286 if !ok {
287 // Done with reading
288 currentWriter.Close()
289 currentWriter = nil
290 currentReaders = nil
291 }
292
293 currentReader = futureReader
294
295 case reader := <-currentReader:
296 var count int64
297 count, err = io.Copy(currentWriter, reader)
298 if err != nil {
299 return err
300 }
301 z.rateLimit.Release(int(count))
302
303 currentReader = nil
304
305 case err = <-z.errors:
Colin Cross2fe66872015-03-30 17:20:39 -0700306 return err
307 }
308 }
309
Dan Willemsen017d8932016-08-04 15:43:03 -0700310 // One last chance to catch an error
311 select {
312 case err = <-z.errors:
313 return err
314 default:
315 zipw.Close()
316 return nil
Colin Cross2fe66872015-03-30 17:20:39 -0700317 }
Colin Cross2fe66872015-03-30 17:20:39 -0700318}
319
Colin Crosse19c7932015-04-24 15:08:38 -0700320func (z *zipWriter) writeListFile(listFile fileArg) error {
Colin Cross2fe66872015-03-30 17:20:39 -0700321 list, err := ioutil.ReadFile(listFile.file)
322 if err != nil {
323 return err
324 }
325
326 files := strings.Split(string(list), "\n")
327
328 for _, file := range files {
329 file = strings.TrimSpace(file)
330 if file == "" {
331 continue
332 }
Colin Crosse19c7932015-04-24 15:08:38 -0700333 err = z.writeRelFile(listFile.relativeRoot, file)
Colin Cross2fe66872015-03-30 17:20:39 -0700334 if err != nil {
335 return err
336 }
337 }
338
339 return nil
340}
341
Colin Crosse19c7932015-04-24 15:08:38 -0700342func (z *zipWriter) writeRelFile(root, file string) error {
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700343 file = filepath.Clean(file)
344
Colin Cross2fe66872015-03-30 17:20:39 -0700345 rel, err := filepath.Rel(root, file)
346 if err != nil {
347 return err
348 }
349
Colin Crosse19c7932015-04-24 15:08:38 -0700350 err = z.writeFile(rel, file)
Colin Cross2fe66872015-03-30 17:20:39 -0700351 if err != nil {
352 return err
353 }
354
355 return nil
356}
357
Colin Crosse19c7932015-04-24 15:08:38 -0700358func (z *zipWriter) writeFile(rel, file string) error {
Dan Willemsen017d8932016-08-04 15:43:03 -0700359 var fileSize int64
Dan Willemsen10462b32017-03-15 19:02:51 -0700360 var executable bool
Dan Willemsen017d8932016-08-04 15:43:03 -0700361
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700362 if s, err := os.Lstat(file); err != nil {
363 return err
364 } else if s.IsDir() {
Colin Cross957cc4e2015-04-24 15:10:32 -0700365 if z.directories {
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700366 return z.writeDirectory(rel)
Colin Cross957cc4e2015-04-24 15:10:32 -0700367 }
368 return nil
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700369 } else if s.Mode()&os.ModeSymlink != 0 {
370 return z.writeSymlink(rel, file)
371 } else if !s.Mode().IsRegular() {
372 return fmt.Errorf("%s is not a file, directory, or symlink", file)
Dan Willemsen017d8932016-08-04 15:43:03 -0700373 } else {
374 fileSize = s.Size()
Dan Willemsen10462b32017-03-15 19:02:51 -0700375 executable = s.Mode()&0100 != 0
Colin Cross957cc4e2015-04-24 15:10:32 -0700376 }
377
Colin Crosse19c7932015-04-24 15:08:38 -0700378 if z.directories {
Colin Cross2fe66872015-03-30 17:20:39 -0700379 dir, _ := filepath.Split(rel)
Colin Crosse19c7932015-04-24 15:08:38 -0700380 err := z.writeDirectory(dir)
381 if err != nil {
382 return err
Colin Cross2fe66872015-03-30 17:20:39 -0700383 }
384 }
385
Dan Willemsen017d8932016-08-04 15:43:03 -0700386 compressChan := make(chan *zipEntry, 1)
387 z.writeOps <- compressChan
388
389 // Pre-fill a zipEntry, it will be sent in the compressChan once
390 // we're sure about the Method and CRC.
391 ze := &zipEntry{
392 fh: &zip.FileHeader{
393 Name: rel,
394 Method: zip.Deflate,
395
396 UncompressedSize64: uint64(fileSize),
397 },
398 }
399 ze.fh.SetModTime(z.time)
Dan Willemsen10462b32017-03-15 19:02:51 -0700400 if executable {
401 ze.fh.SetMode(0700)
402 }
Dan Willemsen017d8932016-08-04 15:43:03 -0700403
404 r, err := os.Open(file)
405 if err != nil {
406 return err
407 }
408
409 exec := z.rateLimit.RequestExecution()
410
411 if fileSize >= minParallelFileSize {
412 wg := new(sync.WaitGroup)
413
414 // Allocate enough buffer to hold all readers. We'll limit
415 // this based on actual buffer sizes in RateLimit.
416 ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
417
418 // Calculate the CRC in the background, since reading the entire
419 // file could take a while.
420 //
421 // We could split this up into chuncks as well, but it's faster
422 // than the compression. Due to the Go Zip API, we also need to
423 // know the result before we can begin writing the compressed
424 // data out to the zipfile.
425 wg.Add(1)
426 go z.crcFile(r, ze, exec, compressChan, wg)
427
428 for start := int64(0); start < fileSize; start += parallelBlockSize {
429 sr := io.NewSectionReader(r, start, parallelBlockSize)
430 resultChan := make(chan io.Reader, 1)
431 ze.futureReaders <- resultChan
432
433 exec := z.rateLimit.RequestExecution()
434
435 last := !(start+parallelBlockSize < fileSize)
436 var dict []byte
437 if start >= windowSize {
438 dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize))
439 }
440
441 wg.Add(1)
442 go z.compressPartialFile(sr, dict, last, exec, resultChan, wg)
443 }
444
445 close(ze.futureReaders)
446
447 // Close the file handle after all readers are done
448 go func(wg *sync.WaitGroup, f *os.File) {
449 wg.Wait()
450 f.Close()
451 }(wg, r)
452 } else {
Dan Willemsena8b55022017-03-15 21:49:26 -0700453 go z.compressWholeFile(ze, r, exec, compressChan)
Dan Willemsen017d8932016-08-04 15:43:03 -0700454 }
455
456 return nil
457}
458
459func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) {
460 defer wg.Done()
461 defer exec.Finish(0)
462
463 crc := crc32.NewIEEE()
464 _, err := io.Copy(crc, r)
465 if err != nil {
466 z.errors <- err
467 return
468 }
469
470 ze.fh.CRC32 = crc.Sum32()
471 resultChan <- ze
472 close(resultChan)
473}
474
475func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) {
476 defer wg.Done()
477
478 result, err := z.compressBlock(r, dict, last)
479 if err != nil {
480 z.errors <- err
481 return
482 }
483
484 exec.Finish(result.Len())
485 resultChan <- result
486}
487
488func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) {
489 buf := new(bytes.Buffer)
490 var fw *flate.Writer
491 var err error
492 if len(dict) > 0 {
493 // There's no way to Reset a Writer with a new dictionary, so
494 // don't use the Pool
495 fw, err = flate.NewWriterDict(buf, z.compLevel, dict)
496 } else {
497 var ok bool
498 if fw, ok = z.compressorPool.Get().(*flate.Writer); ok {
499 fw.Reset(buf)
500 } else {
501 fw, err = flate.NewWriter(buf, z.compLevel)
502 }
503 defer z.compressorPool.Put(fw)
504 }
505 if err != nil {
506 return nil, err
507 }
508
509 _, err = io.Copy(fw, r)
510 if err != nil {
511 return nil, err
512 }
513 if last {
514 fw.Close()
515 } else {
516 fw.Flush()
517 }
518
519 return buf, nil
520}
521
Dan Willemsena8b55022017-03-15 21:49:26 -0700522func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) {
Dan Willemsen017d8932016-08-04 15:43:03 -0700523 var bufSize int
524
525 defer r.Close()
526
Dan Willemsen017d8932016-08-04 15:43:03 -0700527 crc := crc32.NewIEEE()
Dan Willemsena8b55022017-03-15 21:49:26 -0700528 _, err := io.Copy(crc, r)
Colin Cross2fe66872015-03-30 17:20:39 -0700529 if err != nil {
Dan Willemsen017d8932016-08-04 15:43:03 -0700530 z.errors <- err
531 return
Colin Cross2fe66872015-03-30 17:20:39 -0700532 }
533
Dan Willemsena8b55022017-03-15 21:49:26 -0700534 ze.fh.CRC32 = crc.Sum32()
Colin Cross2fe66872015-03-30 17:20:39 -0700535
Dan Willemsen017d8932016-08-04 15:43:03 -0700536 _, err = r.Seek(0, 0)
Colin Cross2fe66872015-03-30 17:20:39 -0700537 if err != nil {
Dan Willemsen017d8932016-08-04 15:43:03 -0700538 z.errors <- err
539 return
Colin Cross2fe66872015-03-30 17:20:39 -0700540 }
541
Dan Willemsen017d8932016-08-04 15:43:03 -0700542 compressed, err := z.compressBlock(r, nil, true)
543
Dan Willemsena8b55022017-03-15 21:49:26 -0700544 ze.futureReaders = make(chan chan io.Reader, 1)
Dan Willemsen017d8932016-08-04 15:43:03 -0700545 futureReader := make(chan io.Reader, 1)
546 ze.futureReaders <- futureReader
547 close(ze.futureReaders)
548
549 if uint64(compressed.Len()) < ze.fh.UncompressedSize64 {
550 futureReader <- compressed
551 bufSize = compressed.Len()
552 } else {
553 _, err = r.Seek(0, 0)
554 if err != nil {
555 z.errors <- err
556 return
557 }
558
559 buf, err := ioutil.ReadAll(r)
560 if err != nil {
561 z.errors <- err
562 return
563 }
564
565 ze.fh.Method = zip.Store
566 futureReader <- bytes.NewReader(buf)
567 bufSize = int(ze.fh.UncompressedSize64)
568 }
569 exec.Finish(bufSize)
570 close(futureReader)
571
572 compressChan <- ze
573 close(compressChan)
Colin Cross2fe66872015-03-30 17:20:39 -0700574}
Colin Crosse19c7932015-04-24 15:08:38 -0700575
576func (z *zipWriter) writeDirectory(dir string) error {
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700577 if dir != "" && !strings.HasSuffix(dir, "/") {
578 dir = dir + "/"
579 }
580
581 for dir != "" && dir != "./" && !z.createdDirs[dir] {
Colin Crosse19c7932015-04-24 15:08:38 -0700582 z.createdDirs[dir] = true
583
584 dirHeader := &zip.FileHeader{
585 Name: dir,
586 }
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700587 dirHeader.SetMode(0700 | os.ModeDir)
Colin Crosse19c7932015-04-24 15:08:38 -0700588 dirHeader.SetModTime(z.time)
589
Dan Willemsen017d8932016-08-04 15:43:03 -0700590 ze := make(chan *zipEntry, 1)
591 ze <- &zipEntry{
592 fh: dirHeader,
Colin Crosse19c7932015-04-24 15:08:38 -0700593 }
Dan Willemsen017d8932016-08-04 15:43:03 -0700594 close(ze)
595 z.writeOps <- ze
Colin Crosse19c7932015-04-24 15:08:38 -0700596
597 dir, _ = filepath.Split(dir)
598 }
599
600 return nil
601}
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700602
603func (z *zipWriter) writeSymlink(rel, file string) error {
604 if z.directories {
605 dir, _ := filepath.Split(rel)
606 if err := z.writeDirectory(dir); err != nil {
607 return err
608 }
609 }
610
611 fileHeader := &zip.FileHeader{
612 Name: rel,
613 }
614 fileHeader.SetModTime(z.time)
615 fileHeader.SetMode(0700 | os.ModeSymlink)
616
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700617 dest, err := os.Readlink(file)
618 if err != nil {
619 return err
620 }
621
Dan Willemsen017d8932016-08-04 15:43:03 -0700622 ze := make(chan *zipEntry, 1)
623 futureReaders := make(chan chan io.Reader, 1)
624 futureReader := make(chan io.Reader, 1)
625 futureReaders <- futureReader
626 close(futureReaders)
627 futureReader <- bytes.NewBufferString(dest)
628 close(futureReader)
629
630 // We didn't ask permission to execute, since this should be very short
631 // but we still need to increment the outstanding buffer sizes, since
632 // the read will decrement the buffer size.
633 z.rateLimit.Release(-len(dest))
634
635 ze <- &zipEntry{
636 fh: fileHeader,
637 futureReaders: futureReaders,
638 }
639 close(ze)
640 z.writeOps <- ze
641
642 return nil
Dan Willemsena59a3bc2016-08-03 17:47:23 -0700643}