Support storing SHA256 checksum for files in soong_zip
Add a -sha256 argument to soong_zip that computes SHA256 checksum for
each file and stores the value in file header. The SHA information can
then be used by downstream systems that use content addressing.
Bug: 259513199
Test: zip_test.go
Test: soong_zip -o test.zip -D test_dir -sha256
Change-Id: I20e9f424bd0a4604f0dc7cc77bd65f10eb49a163
diff --git a/zip/zip.go b/zip/zip.go
index 955fe68..6f1a8ad 100644
--- a/zip/zip.go
+++ b/zip/zip.go
@@ -17,8 +17,11 @@
import (
"bytes"
"compress/flate"
+ "crypto/sha256"
+ "encoding/binary"
"errors"
"fmt"
+ "hash"
"hash/crc32"
"io"
"io/ioutil"
@@ -38,6 +41,14 @@
"android/soong/third_party/zip"
)
+// Sha256HeaderID is a custom Header ID for the `extra` field in
+// the file header to store the SHA checksum.
+const Sha256HeaderID = 0x4967
+
+// Sha256HeaderSignature is the signature to verify that the extra
+// data block is used to store the SHA checksum.
+const Sha256HeaderSignature = 0x9514
+
// Block size used during parallel compression of a single file.
const parallelBlockSize = 1 * 1024 * 1024 // 1MB
@@ -231,6 +242,8 @@
stderr io.Writer
fs pathtools.FileSystem
+
+ sha256Checksum bool
}
type zipEntry struct {
@@ -257,6 +270,7 @@
WriteIfChanged bool
StoreSymlinks bool
IgnoreMissingFiles bool
+ Sha256Checksum bool
Stderr io.Writer
Filesystem pathtools.FileSystem
@@ -280,6 +294,7 @@
ignoreMissingFiles: args.IgnoreMissingFiles,
stderr: args.Stderr,
fs: args.Filesystem,
+ sha256Checksum: args.Sha256Checksum,
}
if z.fs == nil {
@@ -782,15 +797,17 @@
// this based on actual buffer sizes in RateLimit.
ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
- // Calculate the CRC in the background, since reading the entire
- // file could take a while.
+ // Calculate the CRC and SHA256 in the background, since reading
+ // the entire file could take a while.
//
// We could split this up into chunks as well, but it's faster
// than the compression. Due to the Go Zip API, we also need to
// know the result before we can begin writing the compressed
// data out to the zipfile.
+ //
+ // We calculate SHA256 only if `-sha256` is set.
wg.Add(1)
- go z.crcFile(r, ze, compressChan, wg)
+ go z.checksumFileAsync(r, ze, compressChan, wg)
for start := int64(0); start < fileSize; start += parallelBlockSize {
sr := io.NewSectionReader(r, start, parallelBlockSize)
@@ -829,20 +846,53 @@
return nil
}
-func (z *ZipWriter) crcFile(r io.Reader, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) {
+func (z *ZipWriter) checksumFileAsync(r io.ReadSeeker, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) {
defer wg.Done()
defer z.cpuRateLimiter.Finish()
+ z.checksumFile(r, ze)
+
+ resultChan <- ze
+ close(resultChan)
+}
+
+func (z *ZipWriter) checksumFile(r io.ReadSeeker, ze *zipEntry) {
crc := crc32.NewIEEE()
- _, err := io.Copy(crc, r)
+ writers := []io.Writer{crc}
+
+ var shaHasher hash.Hash
+ if z.sha256Checksum && !ze.fh.Mode().IsDir() {
+ shaHasher = sha256.New()
+ writers = append(writers, shaHasher)
+ }
+
+ w := io.MultiWriter(writers...)
+
+ _, err := io.Copy(w, r)
if err != nil {
z.errors <- err
return
}
ze.fh.CRC32 = crc.Sum32()
- resultChan <- ze
- close(resultChan)
+ if shaHasher != nil {
+ z.appendSHAToExtra(ze, shaHasher.Sum(nil))
+ }
+}
+
+func (z *ZipWriter) appendSHAToExtra(ze *zipEntry, checksum []byte) {
+ // The block of SHA256 checksum consist of:
+ // - Header ID, equals to Sha256HeaderID (2 bytes)
+ // - Data size (2 bytes)
+ // - Data block:
+ // - Signature, equals to Sha256HeaderSignature (2 bytes)
+ // - Data, SHA checksum value
+ var buf []byte
+ buf = binary.LittleEndian.AppendUint16(buf, Sha256HeaderID)
+ buf = binary.LittleEndian.AppendUint16(buf, uint16(len(checksum)+2))
+ buf = binary.LittleEndian.AppendUint16(buf, Sha256HeaderSignature)
+ buf = append(buf, checksum...)
+ ze.fh.Extra = append(ze.fh.Extra, buf...)
}
func (z *ZipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, resultChan chan io.Reader, wg *sync.WaitGroup) {
@@ -894,17 +944,9 @@
}
func (z *ZipWriter) compressWholeFile(ze *zipEntry, r io.ReadSeeker, compressChan chan *zipEntry) {
+ z.checksumFile(r, ze)
- crc := crc32.NewIEEE()
- _, err := io.Copy(crc, r)
- if err != nil {
- z.errors <- err
- return
- }
-
- ze.fh.CRC32 = crc.Sum32()
-
- _, err = r.Seek(0, 0)
+ _, err := r.Seek(0, 0)
if err != nil {
z.errors <- err
return