Support exclusions and Blueprint-style ** globs in zip2zip

Jacoco support will use zip2zip to create a jar that is a subset
of another jar, and will need exclusion filters and recursive
globs.  Switch zip2zip from filepath.Match to pathtools.Match,
and check each included file against the exclusion list.

Bug: 69629238
Test: zip2zip_test.go
Change-Id: Ibe961b0775987f52f1efa357e1201c3ebb81ca9c
diff --git a/cmd/zip2zip/Android.bp b/cmd/zip2zip/Android.bp
index 6420219..68d8bc7 100644
--- a/cmd/zip2zip/Android.bp
+++ b/cmd/zip2zip/Android.bp
@@ -15,8 +15,9 @@
 blueprint_go_binary {
     name: "zip2zip",
     deps: [
-      "android-archive-zip",
-      "soong-jar",
+        "android-archive-zip",
+        "blueprint-pathtools",
+        "soong-jar",
     ],
     srcs: [
         "zip2zip.go",
diff --git a/cmd/zip2zip/zip2zip.go b/cmd/zip2zip/zip2zip.go
index f48d458..e8ea9b9 100644
--- a/cmd/zip2zip/zip2zip.go
+++ b/cmd/zip2zip/zip2zip.go
@@ -24,6 +24,8 @@
 	"strings"
 	"time"
 
+	"github.com/google/blueprint/pathtools"
+
 	"android/soong/jar"
 	"android/soong/third_party/zip"
 )
@@ -36,8 +38,14 @@
 	setTime   = flag.Bool("t", false, "set timestamps to 2009-01-01 00:00:00")
 
 	staticTime = time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	excludes excludeArgs
 )
 
+func init() {
+	flag.Var(&excludes, "x", "exclude a filespec from the output")
+}
+
 func main() {
 	flag.Usage = func() {
 		fmt.Fprintln(os.Stderr, "usage: zip2zip -i zipfile -o zipfile [-s|-j] [-t] [filespec]...")
@@ -45,15 +53,14 @@
 		fmt.Fprintln(os.Stderr, "  filespec:")
 		fmt.Fprintln(os.Stderr, "    <name>")
 		fmt.Fprintln(os.Stderr, "    <in_name>:<out_name>")
-		fmt.Fprintln(os.Stderr, "    <glob>:<out_dir>/")
+		fmt.Fprintln(os.Stderr, "    <glob>[:<out_dir>]")
 		fmt.Fprintln(os.Stderr, "")
-		fmt.Fprintln(os.Stderr, "<glob> uses the rules at https://golang.org/pkg/path/filepath/#Match")
-		fmt.Fprintln(os.Stderr, "As a special exception, '**' is supported to specify all files in the input zip.")
+		fmt.Fprintln(os.Stderr, "<glob> uses the rules at https://godoc.org/github.com/google/blueprint/pathtools/#Match")
 		fmt.Fprintln(os.Stderr, "")
 		fmt.Fprintln(os.Stderr, "Files will be copied with their existing compression from the input zipfile to")
 		fmt.Fprintln(os.Stderr, "the output zipfile, in the order of filespec arguments.")
 		fmt.Fprintln(os.Stderr, "")
-		fmt.Fprintln(os.Stderr, "If no filepsec is provided all files are copied (equivalent to '**').")
+		fmt.Fprintln(os.Stderr, "If no filepsec is provided all files and directories are copied.")
 	}
 
 	flag.Parse()
@@ -85,7 +92,9 @@
 		}
 	}()
 
-	if err := zip2zip(&reader.Reader, writer, *sortGlobs, *sortJava, *setTime, flag.Args()); err != nil {
+	if err := zip2zip(&reader.Reader, writer, *sortGlobs, *sortJava, *setTime,
+		flag.Args(), excludes); err != nil {
+
 		log.Fatal(err)
 	}
 }
@@ -95,91 +104,126 @@
 	newName string
 }
 
-func zip2zip(reader *zip.Reader, writer *zip.Writer, sortGlobs, sortJava, setTime bool, args []string) error {
-	if len(args) == 0 {
-		// If no filespec is provided, default to copying everything
-		args = []string{"**"}
-	}
-	for _, arg := range args {
-		var input string
-		var output string
+func zip2zip(reader *zip.Reader, writer *zip.Writer, sortOutput, sortJava, setTime bool,
+	includes []string, excludes []string) error {
 
+	matches := []pair{}
+
+	sortMatches := func(matches []pair) {
+		if sortJava {
+			sort.SliceStable(matches, func(i, j int) bool {
+				return jar.EntryNamesLess(matches[i].newName, matches[j].newName)
+			})
+		} else if sortOutput {
+			sort.SliceStable(matches, func(i, j int) bool {
+				return matches[i].newName < matches[j].newName
+			})
+		}
+	}
+
+	for _, include := range includes {
 		// Reserve escaping for future implementation, so make sure no
 		// one is using \ and expecting a certain behavior.
-		if strings.Contains(arg, "\\") {
+		if strings.Contains(include, "\\") {
 			return fmt.Errorf("\\ characters are not currently supported")
 		}
 
-		args := strings.SplitN(arg, ":", 2)
-		input = args[0]
-		if len(args) == 2 {
-			output = args[1]
-		}
+		input, output := includeSplit(include)
 
-		matches := []pair{}
-		if strings.IndexAny(input, "*?[") >= 0 {
-			matchAll := input == "**"
-			if !matchAll && strings.Contains(input, "**") {
-				return fmt.Errorf("** is only supported on its own, not with other characters")
-			}
+		var includeMatches []pair
 
-			for _, file := range reader.File {
-				match := matchAll
-
-				if !match {
-					var err error
-					match, err = filepath.Match(input, file.Name)
-					if err != nil {
-						return err
-					}
-				}
-
-				if match {
-					var newName string
-					if output == "" {
-						newName = file.Name
-					} else {
+		for _, file := range reader.File {
+			var newName string
+			if match, err := pathtools.Match(input, file.Name); err != nil {
+				return err
+			} else if match {
+				if output == "" {
+					newName = file.Name
+				} else {
+					if pathtools.IsGlob(input) {
+						// If the input is a glob then the output is a directory.
 						_, name := filepath.Split(file.Name)
 						newName = filepath.Join(output, name)
+					} else {
+						// Otherwise it is a file.
+						newName = output
 					}
-					matches = append(matches, pair{file, newName})
 				}
-			}
-
-			if sortJava {
-				jarSort(matches)
-			} else if sortGlobs {
-				sort.SliceStable(matches, func(i, j int) bool {
-					return matches[i].newName < matches[j].newName
-				})
-			}
-		} else {
-			if output == "" {
-				output = input
-			}
-			for _, file := range reader.File {
-				if input == file.Name {
-					matches = append(matches, pair{file, output})
-					break
-				}
+				includeMatches = append(includeMatches, pair{file, newName})
 			}
 		}
 
-		for _, match := range matches {
-			if setTime {
-				match.File.SetModTime(staticTime)
-			}
-			if err := writer.CopyFrom(match.File, match.newName); err != nil {
+		sortMatches(includeMatches)
+		matches = append(matches, includeMatches...)
+	}
+
+	if len(includes) == 0 {
+		// implicitly match everything
+		for _, file := range reader.File {
+			matches = append(matches, pair{file, file.Name})
+		}
+		sortMatches(matches)
+	}
+
+	var matchesAfterExcludes []pair
+	seen := make(map[string]*zip.File)
+
+	for _, match := range matches {
+		// Filter out matches whose original file name matches an exclude filter
+		excluded := false
+		for _, exclude := range excludes {
+			if excludeMatch, err := pathtools.Match(exclude, match.File.Name); err != nil {
 				return err
+			} else if excludeMatch {
+				excluded = true
+				break
 			}
 		}
+
+		if excluded {
+			continue
+		}
+
+		// Check for duplicate output names, ignoring ones that come from the same input zip entry.
+		if prev, exists := seen[match.newName]; exists {
+			if prev != match.File {
+				return fmt.Errorf("multiple entries for %q with different contents", match.newName)
+			}
+			continue
+		}
+		seen[match.newName] = match.File
+
+		matchesAfterExcludes = append(matchesAfterExcludes, match)
+	}
+
+	for _, match := range matchesAfterExcludes {
+		if setTime {
+			match.File.SetModTime(staticTime)
+		}
+		if err := writer.CopyFrom(match.File, match.newName); err != nil {
+			return err
+		}
 	}
 
 	return nil
 }
 
-func jarSort(files []pair) {
-	sort.SliceStable(files, func(i, j int) bool {
-		return jar.EntryNamesLess(files[i].newName, files[j].newName)
-	})
+func includeSplit(s string) (string, string) {
+	split := strings.SplitN(s, ":", 2)
+	if len(split) == 2 {
+		return split[0], split[1]
+	} else {
+		return split[0], ""
+	}
+}
+
+type excludeArgs []string
+
+func (e *excludeArgs) String() string {
+	return strings.Join(*e, " ")
+}
+
+func (e *excludeArgs) Set(s string) error {
+	*e = append(*e, s)
+	return nil
 }
diff --git a/cmd/zip2zip/zip2zip_test.go b/cmd/zip2zip/zip2zip_test.go
index 53c8ce2..212ab28 100644
--- a/cmd/zip2zip/zip2zip_test.go
+++ b/cmd/zip2zip/zip2zip_test.go
@@ -30,6 +30,7 @@
 	sortGlobs  bool
 	sortJava   bool
 	args       []string
+	excludes   []string
 
 	outputFiles []string
 	err         error
@@ -41,13 +42,6 @@
 
 		err: fmt.Errorf("\\ characters are not currently supported"),
 	},
-	{
-		name: "unsupported **",
-
-		args: []string{"a/**:b"},
-
-		err: fmt.Errorf("** is only supported on its own, not with other characters"),
-	},
 	{ // This is modelled after the update package build rules in build/make/core/Makefile
 		name: "filter globs",
 
@@ -95,16 +89,19 @@
 		name: "sort all",
 
 		inputFiles: []string{
+			"RADIO/",
 			"RADIO/a",
+			"IMAGES/",
 			"IMAGES/system.img",
 			"IMAGES/b.txt",
 			"IMAGES/recovery.img",
 			"IMAGES/vendor.img",
+			"OTA/",
 			"OTA/b",
 			"OTA/android-info.txt",
 		},
 		sortGlobs: true,
-		args:      []string{"**"},
+		args:      []string{"**/*"},
 
 		outputFiles: []string{
 			"IMAGES/b.txt",
@@ -120,11 +117,14 @@
 		name: "sort all implicit",
 
 		inputFiles: []string{
+			"RADIO/",
 			"RADIO/a",
+			"IMAGES/",
 			"IMAGES/system.img",
 			"IMAGES/b.txt",
 			"IMAGES/recovery.img",
 			"IMAGES/vendor.img",
+			"OTA/",
 			"OTA/b",
 			"OTA/android-info.txt",
 		},
@@ -132,12 +132,15 @@
 		args:      nil,
 
 		outputFiles: []string{
+			"IMAGES/",
 			"IMAGES/b.txt",
 			"IMAGES/recovery.img",
 			"IMAGES/system.img",
 			"IMAGES/vendor.img",
+			"OTA/",
 			"OTA/android-info.txt",
 			"OTA/b",
+			"RADIO/",
 			"RADIO/a",
 		},
 	},
@@ -177,7 +180,7 @@
 			"b",
 			"a",
 		},
-		args: []string{"a:a2", "**"},
+		args: []string{"a:a2", "**/*"},
 
 		outputFiles: []string{
 			"a2",
@@ -185,6 +188,69 @@
 			"a",
 		},
 	},
+	{
+		name: "multiple matches",
+
+		inputFiles: []string{
+			"a/a",
+		},
+		args: []string{"a/a", "a/*"},
+
+		outputFiles: []string{
+			"a/a",
+		},
+	},
+	{
+		name: "multiple conflicting matches",
+
+		inputFiles: []string{
+			"a/a",
+			"a/b",
+		},
+		args: []string{"a/b:a/a", "a/*"},
+
+		err: fmt.Errorf(`multiple entries for "a/a" with different contents`),
+	},
+	{
+		name: "excludes",
+
+		inputFiles: []string{
+			"a/a",
+			"a/b",
+		},
+		args:     nil,
+		excludes: []string{"a/a"},
+
+		outputFiles: []string{
+			"a/b",
+		},
+	},
+	{
+		name: "excludes with include",
+
+		inputFiles: []string{
+			"a/a",
+			"a/b",
+		},
+		args:     []string{"a/*"},
+		excludes: []string{"a/a"},
+
+		outputFiles: []string{
+			"a/b",
+		},
+	},
+	{
+		name: "excludes with glob",
+
+		inputFiles: []string{
+			"a/a",
+			"a/b",
+		},
+		args:     []string{"a/*"},
+		excludes: []string{"a/*"},
+
+		outputFiles: nil,
+	},
 }
 
 func errorString(e error) string {
@@ -216,7 +282,7 @@
 			}
 
 			outputWriter := zip.NewWriter(outputBuf)
-			err = zip2zip(inputReader, outputWriter, testCase.sortGlobs, testCase.sortJava, false, testCase.args)
+			err = zip2zip(inputReader, outputWriter, testCase.sortGlobs, testCase.sortJava, false, testCase.args, testCase.excludes)
 			if errorString(testCase.err) != errorString(err) {
 				t.Fatalf("Unexpected error:\n got: %q\nwant: %q", errorString(err), errorString(testCase.err))
 			}