Support moving sources in srcjars in soong_zip

Add a -srcjar argument to soong_zip that causes it to read the
package statement of each .java file and use that to place the
source file at a path that matches the package.

Test: jar_test.go, zip_test.go
Change-Id: I36017e42445ba3b0a82a10a8d81e8ac0cca096f2
diff --git a/jar/Android.bp b/jar/Android.bp
index 6c2e60e..2563474 100644
--- a/jar/Android.bp
+++ b/jar/Android.bp
@@ -18,8 +18,10 @@
     srcs: [
         "jar.go",
     ],
+    testSrcs: [
+        "jar_test.go",
+    ],
     deps: [
         "android-archive-zip",
     ],
 }
-
diff --git a/jar/jar.go b/jar/jar.go
index fa0e693..a8f06a4 100644
--- a/jar/jar.go
+++ b/jar/jar.go
@@ -17,9 +17,12 @@
 import (
 	"bytes"
 	"fmt"
+	"io"
 	"os"
 	"strings"
+	"text/scanner"
 	"time"
+	"unicode"
 
 	"android/soong/third_party/zip"
 )
@@ -112,3 +115,111 @@
 
 	return finalBytes, nil
 }
+
+var javaIgnorableIdentifier = &unicode.RangeTable{
+	R16: []unicode.Range16{
+		{0x00, 0x08, 1},
+		{0x0e, 0x1b, 1},
+		{0x7f, 0x9f, 1},
+	},
+	LatinOffset: 3,
+}
+
+func javaIdentRune(ch rune, i int) bool {
+	if unicode.IsLetter(ch) {
+		return true
+	}
+	if unicode.IsDigit(ch) && i > 0 {
+		return true
+	}
+
+	if unicode.In(ch,
+		unicode.Nl, // letter number
+		unicode.Sc, // currency symbol
+		unicode.Pc, // connecting punctuation
+	) {
+		return true
+	}
+
+	if unicode.In(ch,
+		unicode.Cf, // format
+		unicode.Mc, // combining mark
+		unicode.Mn, // non-spacing mark
+		javaIgnorableIdentifier,
+	) && i > 0 {
+		return true
+	}
+
+	return false
+}
+
+// JavaPackage parses the package out of a java source file by looking for the package statement, or the first valid
+// non-package statement, in which case it returns an empty string for the package.
+func JavaPackage(r io.Reader, src string) (string, error) {
+	var s scanner.Scanner
+	var sErr error
+
+	s.Init(r)
+	s.Filename = src
+	s.Error = func(s *scanner.Scanner, msg string) {
+		sErr = fmt.Errorf("error parsing %q: %s", src, msg)
+	}
+	s.IsIdentRune = javaIdentRune
+
+	tok := s.Scan()
+	if sErr != nil {
+		return "", sErr
+	}
+	if tok == scanner.Ident {
+		switch s.TokenText() {
+		case "package":
+		// Nothing
+		case "import":
+			// File has no package statement, first keyword is an import
+			return "", nil
+		case "class", "enum", "interface":
+			// File has no package statement, first keyword is a type declaration
+			return "", nil
+		case "public", "protected", "private", "abstract", "static", "final", "strictfp":
+			// File has no package statement, first keyword is a modifier
+			return "", nil
+		case "module", "open":
+			// File has no package statement, first keyword is a module declaration
+			return "", nil
+		default:
+			return "", fmt.Errorf(`expected first token of java file to be "package", got %q`, s.TokenText())
+		}
+	} else if tok == '@' {
+		// File has no package statement, first token is an annotation
+		return "", nil
+	} else if tok == scanner.EOF {
+		// File no package statement, it has no non-whitespace non-comment tokens
+		return "", nil
+	} else {
+		return "", fmt.Errorf(`expected first token of java file to be "package", got %q`, s.TokenText())
+	}
+
+	var pkg string
+	for {
+		tok = s.Scan()
+		if sErr != nil {
+			return "", sErr
+		}
+		if tok != scanner.Ident {
+			return "", fmt.Errorf(`expected "package <package>;", got "package %s%s"`, pkg, s.TokenText())
+		}
+		pkg += s.TokenText()
+
+		tok = s.Scan()
+		if sErr != nil {
+			return "", sErr
+		}
+		if tok == ';' {
+			return pkg, nil
+		} else if tok == '.' {
+			pkg += "."
+		} else {
+			return "", fmt.Errorf(`expected "package <package>;", got "package %s%s"`, pkg, s.TokenText())
+		}
+	}
+}
diff --git a/jar/jar_test.go b/jar/jar_test.go
new file mode 100644
index 0000000..c92011e
--- /dev/null
+++ b/jar/jar_test.go
@@ -0,0 +1,182 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jar
+
+import (
+	"bytes"
+	"io"
+	"testing"
+)
+
+func TestGetJavaPackage(t *testing.T) {
+	type args struct {
+		r   io.Reader
+		src string
+	}
+	tests := []struct {
+		name    string
+		in      string
+		want    string
+		wantErr bool
+	}{
+		{
+			name: "simple",
+			in:   "package foo.bar;",
+			want: "foo.bar",
+		},
+		{
+			name: "comment",
+			in:   "/* test */\npackage foo.bar;",
+			want: "foo.bar",
+		},
+		{
+			name: "no package",
+			in:   "import foo.bar;",
+			want: "",
+		},
+		{
+			name:    "missing semicolon error",
+			in:      "package foo.bar",
+			wantErr: true,
+		},
+		{
+			name:    "parser error",
+			in:      "/*",
+			wantErr: true,
+		},
+		{
+			name:    "parser ident error",
+			in:      "package 0foo.bar;",
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			buf := bytes.NewBufferString(tt.in)
+			got, err := JavaPackage(buf, "<test>")
+			if (err != nil) != tt.wantErr {
+				t.Errorf("JavaPackage() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("JavaPackage() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_javaIdentRune(t *testing.T) {
+	// runes that should be valid anywhere in an identifier
+	validAnywhere := []rune{
+		// letters, $, _
+		'a',
+		'A',
+		'$',
+		'_',
+
+		// assorted unicode
+		'𐐀',
+		'𐐨',
+		'Dž',
+		'ῼ',
+		'ʰ',
+		'゚',
+		'ƻ',
+		'㡢',
+		'₩',
+		'_',
+		'Ⅰ',
+		'𐍊',
+	}
+
+	// runes that should be invalid as the first rune in an identifier, but valid anywhere else
+	validAfterFirst := []rune{
+		// digits
+		'0',
+
+		// assorted unicode
+		'᥍',
+		'𝟎',
+		'ྂ',
+		'𝆀',
+
+		// control characters
+		'\x00',
+		'\b',
+		'\u000e',
+		'\u001b',
+		'\u007f',
+		'\u009f',
+		'\u00ad',
+		0xE007F,
+
+		// zero width space
+		'\u200b',
+	}
+
+	// runes that should never be valid in an identifier
+	invalid := []rune{
+		';',
+		0x110000,
+	}
+
+	validFirst := validAnywhere
+	invalidFirst := append(validAfterFirst, invalid...)
+	validPart := append(validAnywhere, validAfterFirst...)
+	invalidPart := invalid
+
+	check := func(t *testing.T, ch rune, i int, want bool) {
+		t.Helper()
+		if got := javaIdentRune(ch, i); got != want {
+			t.Errorf("javaIdentRune() = %v, want %v", got, want)
+		}
+	}
+
+	t.Run("first", func(t *testing.T) {
+		t.Run("valid", func(t *testing.T) {
+			for _, ch := range validFirst {
+				t.Run(string(ch), func(t *testing.T) {
+					check(t, ch, 0, true)
+				})
+			}
+		})
+
+		t.Run("invalid", func(t *testing.T) {
+			for _, ch := range invalidFirst {
+				t.Run(string(ch), func(t *testing.T) {
+					check(t, ch, 0, false)
+				})
+			}
+		})
+	})
+
+	t.Run("part", func(t *testing.T) {
+		t.Run("valid", func(t *testing.T) {
+			for _, ch := range validPart {
+				t.Run(string(ch), func(t *testing.T) {
+					check(t, ch, 1, true)
+				})
+			}
+		})
+
+		t.Run("invalid", func(t *testing.T) {
+			for _, ch := range invalidPart {
+				t.Run(string(ch), func(t *testing.T) {
+					check(t, ch, 1, false)
+				})
+			}
+		})
+	})
+}