Fix empty item handling in Split function, rewrite Words function

Test: treehugger
Bug: 172923994
Change-Id: Ic7ee7b1af6e1438df5cf06754b9bec7038b624f2
diff --git a/androidmk/parser/make_strings.go b/androidmk/parser/make_strings.go
index 4b782a2..3c4815e 100644
--- a/androidmk/parser/make_strings.go
+++ b/androidmk/parser/make_strings.go
@@ -15,8 +15,10 @@
 package parser
 
 import (
+	"fmt"
 	"strings"
 	"unicode"
+	"unicode/utf8"
 )
 
 // A MakeString is a string that may contain variable substitutions in it.
@@ -130,8 +132,85 @@
 	})
 }
 
+// Words splits MakeString into multiple makeStrings separated by whitespace.
+// Thus, " a $(X)b  c " will be split into ["a", "$(X)b", "c"].
+// Splitting a MakeString consisting solely of whitespace yields empty array.
 func (ms *MakeString) Words() []*MakeString {
-	return ms.splitNFunc(-1, splitWords)
+	var ch rune    // current character
+	const EOF = -1 // no more characters
+	const EOS = -2 // at the end of a string chunk
+
+	// Next character's chunk and position
+	iString := 0
+	iChar := 0
+
+	var words []*MakeString
+	word := SimpleMakeString("", ms.Pos())
+
+	nextChar := func() {
+		if iString >= len(ms.Strings) {
+			ch = EOF
+		} else if iChar >= len(ms.Strings[iString]) {
+			iString++
+			iChar = 0
+			ch = EOS
+		} else {
+			var w int
+			ch, w = utf8.DecodeRuneInString(ms.Strings[iString][iChar:])
+			iChar += w
+		}
+	}
+
+	appendVariableAndAdvance := func() {
+		if iString-1 < len(ms.Variables) {
+			word.appendVariable(ms.Variables[iString-1])
+		}
+		nextChar()
+	}
+
+	appendCharAndAdvance := func(c rune) {
+		if c != EOF {
+			word.appendString(string(c))
+		}
+		nextChar()
+	}
+
+	nextChar()
+	for ch != EOF {
+		// Skip whitespace
+		for ch == ' ' || ch == '\t' {
+			nextChar()
+		}
+		if ch == EOS {
+			// "... $(X)... " case. The current word should be empty.
+			if !word.Empty() {
+				panic(fmt.Errorf("%q: EOS while current word %q is not empty, iString=%d",
+					ms.Dump(), word.Dump(), iString))
+			}
+			appendVariableAndAdvance()
+		}
+		// Copy word
+		for ch != EOF {
+			if ch == ' ' || ch == '\t' {
+				words = append(words, word)
+				word = SimpleMakeString("", ms.Pos())
+				break
+			}
+			if ch == EOS {
+				// "...a$(X)..." case. Append variable to the current word
+				appendVariableAndAdvance()
+			} else {
+				if ch == '\\' {
+					appendCharAndAdvance('\\')
+				}
+				appendCharAndAdvance(ch)
+			}
+		}
+	}
+	if !word.Empty() {
+		words = append(words, word)
+	}
+	return words
 }
 
 func (ms *MakeString) splitNFunc(n int, splitFunc func(s string, n int) []string) []*MakeString {
@@ -166,9 +245,7 @@
 		}
 	}
 
-	if !curMs.Empty() {
-		ret = append(ret, curMs)
-	}
+	ret = append(ret, curMs)
 	return ret
 }
 
@@ -219,44 +296,6 @@
 	return ret
 }
 
-func splitWords(s string, n int) []string {
-	ret := []string{}
-	preserve := ""
-	for n == -1 || n > 1 {
-		index := strings.IndexAny(s, " \t")
-		if index == 0 && len(preserve) == 0 {
-			s = s[1:]
-		} else if index >= 0 {
-			escapeCount := 0
-			for i := index - 1; i >= 0; i-- {
-				if s[i] != '\\' {
-					break
-				}
-				escapeCount += 1
-			}
-
-			if escapeCount%2 == 1 {
-				preserve += s[0 : index+1]
-				s = s[index+1:]
-				continue
-			}
-
-			ret = append(ret, preserve+s[0:index])
-			s = s[index+1:]
-			preserve = ""
-			if n > 0 {
-				n--
-			}
-		} else {
-			break
-		}
-	}
-	if preserve != "" || s != "" || len(ret) == 0 {
-		ret = append(ret, preserve+s)
-	}
-	return ret
-}
-
 func unescape(s string) string {
 	ret := ""
 	for {
diff --git a/androidmk/parser/make_strings_test.go b/androidmk/parser/make_strings_test.go
index 6995e89..fbb289b 100644
--- a/androidmk/parser/make_strings_test.go
+++ b/androidmk/parser/make_strings_test.go
@@ -26,64 +26,53 @@
 	n        int
 }{
 	{
-		in: &MakeString{
-			Strings: []string{
-				"a b c",
-				"d e f",
-				" h i j",
-			},
-			Variables: []Variable{
-				Variable{Name: SimpleMakeString("var1", NoPos)},
-				Variable{Name: SimpleMakeString("var2", NoPos)},
-			},
-		},
+		// "a b c$(var1)d e f$(var2) h i j"
+		in:  genMakeString("a b c", "var1", "d e f", "var2", " h i j"),
 		sep: " ",
 		n:   -1,
 		expected: []*MakeString{
-			SimpleMakeString("a", NoPos),
-			SimpleMakeString("b", NoPos),
-			&MakeString{
-				Strings: []string{"c", "d"},
-				Variables: []Variable{
-					Variable{Name: SimpleMakeString("var1", NoPos)},
-				},
-			},
-			SimpleMakeString("e", NoPos),
-			&MakeString{
-				Strings: []string{"f", ""},
-				Variables: []Variable{
-					Variable{Name: SimpleMakeString("var2", NoPos)},
-				},
-			},
-			SimpleMakeString("h", NoPos),
-			SimpleMakeString("i", NoPos),
-			SimpleMakeString("j", NoPos),
+			genMakeString("a"),
+			genMakeString("b"),
+			genMakeString("c", "var1", "d"),
+			genMakeString("e"),
+			genMakeString("f", "var2", ""),
+			genMakeString("h"),
+			genMakeString("i"),
+			genMakeString("j"),
 		},
 	},
 	{
-		in: &MakeString{
-			Strings: []string{
-				"a b c",
-				"d e f",
-				" h i j",
-			},
-			Variables: []Variable{
-				Variable{Name: SimpleMakeString("var1", NoPos)},
-				Variable{Name: SimpleMakeString("var2", NoPos)},
-			},
-		},
+		// "a b c$(var1)d e f$(var2) h i j"
+		in:  genMakeString("a b c", "var1", "d e f", "var2", " h i j"),
 		sep: " ",
 		n:   3,
 		expected: []*MakeString{
-			SimpleMakeString("a", NoPos),
-			SimpleMakeString("b", NoPos),
-			&MakeString{
-				Strings: []string{"c", "d e f", " h i j"},
-				Variables: []Variable{
-					Variable{Name: SimpleMakeString("var1", NoPos)},
-					Variable{Name: SimpleMakeString("var2", NoPos)},
-				},
-			},
+			genMakeString("a"),
+			genMakeString("b"),
+			genMakeString("c", "var1", "d e f", "var2", " h i j"),
+		},
+	},
+	{
+		// "$(var1) $(var2)"
+		in:  genMakeString("", "var1", " ", "var2", ""),
+		sep: " ",
+		n:   -1,
+		expected: []*MakeString{
+			genMakeString("", "var1", ""),
+			genMakeString("", "var2", ""),
+		},
+	},
+	{
+		// "a,,b,c,"
+		in:  genMakeString("a,,b,c,"),
+		sep: ",",
+		n:   -1,
+		expected: []*MakeString{
+			genMakeString("a"),
+			genMakeString(""),
+			genMakeString("b"),
+			genMakeString("c"),
+			genMakeString(""),
 		},
 	},
 }
@@ -104,15 +93,15 @@
 	expected string
 }{
 	{
-		in:       SimpleMakeString("a b", NoPos),
+		in:       genMakeString("a b"),
 		expected: "a b",
 	},
 	{
-		in:       SimpleMakeString("a\\ \\\tb\\\\", NoPos),
+		in:       genMakeString("a\\ \\\tb\\\\"),
 		expected: "a \tb\\",
 	},
 	{
-		in:       SimpleMakeString("a\\b\\", NoPos),
+		in:       genMakeString("a\\b\\"),
 		expected: "a\\b\\",
 	},
 }
@@ -131,31 +120,88 @@
 	expected []*MakeString
 }{
 	{
-		in:       SimpleMakeString("", NoPos),
+		in:       genMakeString(""),
 		expected: []*MakeString{},
 	},
 	{
-		in: SimpleMakeString(" a b\\ c d", NoPos),
+		in: genMakeString(` a b\ c d`),
 		expected: []*MakeString{
-			SimpleMakeString("a", NoPos),
-			SimpleMakeString("b\\ c", NoPos),
-			SimpleMakeString("d", NoPos),
+			genMakeString("a"),
+			genMakeString(`b\ c`),
+			genMakeString("d"),
 		},
 	},
 	{
-		in: SimpleMakeString("  a\tb\\\t\\ c d  ", NoPos),
+		in: SimpleMakeString("  a\tb"+`\`+"\t"+`\ c d  `, NoPos),
 		expected: []*MakeString{
-			SimpleMakeString("a", NoPos),
-			SimpleMakeString("b\\\t\\ c", NoPos),
-			SimpleMakeString("d", NoPos),
+			genMakeString("a"),
+			genMakeString("b" + `\` + "\t" + `\ c`),
+			genMakeString("d"),
 		},
 	},
 	{
-		in: SimpleMakeString(`a\\ b\\\ c d`, NoPos),
+		in: genMakeString(`a\\ b\\\ c d`),
 		expected: []*MakeString{
-			SimpleMakeString(`a\\`, NoPos),
-			SimpleMakeString(`b\\\ c`, NoPos),
-			SimpleMakeString("d", NoPos),
+			genMakeString(`a\\`),
+			genMakeString(`b\\\ c`),
+			genMakeString("d"),
+		},
+	},
+	{
+		in: genMakeString(`\\ a`),
+		expected: []*MakeString{
+			genMakeString(`\\`),
+			genMakeString("a"),
+		},
+	},
+	{
+		// "  "
+		in: &MakeString{
+			Strings:   []string{" \t \t"},
+			Variables: nil,
+		},
+		expected: []*MakeString{},
+	},
+	{
+		// " a $(X)b c "
+		in: genMakeString(" a ", "X", "b c "),
+		expected: []*MakeString{
+			genMakeString("a"),
+			genMakeString("", "X", "b"),
+			genMakeString("c"),
+		},
+	},
+	{
+		// " a b$(X)c d"
+		in: genMakeString(" a b", "X", "c d"),
+		expected: []*MakeString{
+			genMakeString("a"),
+			genMakeString("b", "X", "c"),
+			genMakeString("d"),
+		},
+	},
+	{
+		// "$(X) $(Y)"
+		in: genMakeString("", "X", " ", "Y", ""),
+		expected: []*MakeString{
+			genMakeString("", "X", ""),
+			genMakeString("", "Y", ""),
+		},
+	},
+	{
+		// " a$(X) b"
+		in: genMakeString(" a", "X", " b"),
+		expected: []*MakeString{
+			genMakeString("a", "X", ""),
+			genMakeString("b"),
+		},
+	},
+	{
+		// "a$(X) b$(Y) "
+		in: genMakeString("a", "X", " b", "Y", " "),
+		expected: []*MakeString{
+			genMakeString("a", "X", ""),
+			genMakeString("b", "Y", ""),
 		},
 	},
 }
@@ -180,3 +226,20 @@
 
 	return strings.Join(ret, "|||")
 }
+
+// generates MakeString from alternating string chunks and variable names,
+// e.g., genMakeString("a", "X", "b") returns MakeString for "a$(X)b"
+func genMakeString(items ...string) *MakeString {
+	n := len(items) / 2
+	if len(items) != (2*n + 1) {
+		panic("genMakeString expects odd number of arguments")
+	}
+
+	ms := &MakeString{Strings: make([]string, n+1), Variables: make([]Variable, n)}
+	ms.Strings[0] = items[0]
+	for i := 1; i <= n; i++ {
+		ms.Variables[i-1] = Variable{Name: SimpleMakeString(items[2*i-1], NoPos)}
+		ms.Strings[i] = items[2*i]
+	}
+	return ms
+}