init: Add C++ tokenizer.

Adds a C++ tokenizer along with unit tests.

This tokenizer will replace the current C implementation
which does a poor job of keeping track of pointers.

This CL is a prerequisite for up coming changes to
the parser. This CL does not wire up this tokenizer and
changes no exsiting code. All that builds is the unit tests.

Change-Id: Iec3740bce7153640adc5e5bbdc57e644cedf0038
TEST: Unit tests all pass. No leaks under valgrind
BUG: 22843198
diff --git a/init/parser/tokenizer.h b/init/parser/tokenizer.h
new file mode 100644
index 0000000..40a22b1
--- /dev/null
+++ b/init/parser/tokenizer.h
@@ -0,0 +1,69 @@
+// Copyright (C) 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+
+namespace init {
+
+// Used to tokenize a std::string.
+// Call Next() to advance through each token until it returns false,
+// indicating there are no more tokens left in the string.
+// The current token can be accessed with current(), which returns
+// a Token.
+// Supported tokens are:
+// TOK_START - Next() has yet to be called
+// TOK_END - At the end of string
+// TOK_NEWLINE - The end of a line denoted by \n.
+// TOK_TEXT - A word.
+// Comments are denoted with '#' and the tokenizer will ignore
+// the rest of the line.
+// Double quotes can be used to insert whitespace into words.
+// A backslash at the end of a line denotes continuation and
+// a TOK_NEWLINE will not be generated for that line.
+class Tokenizer {
+ public:
+  Tokenizer(const std::string& data);
+  ~Tokenizer();
+
+  enum TokenType { TOK_START, TOK_END, TOK_NEWLINE, TOK_TEXT };
+  struct Token {
+    TokenType type;
+    std::string text;
+  };
+
+  // Returns the curret token.
+  const Token& current();
+
+  // Move to the next token, returns false at the end of input.
+  bool Next();
+
+ private:
+  void GetData();
+  void AdvChar();
+  void AdvText();
+  void AdvUntil(char x);
+  void AdvWhiteSpace();
+  void StartText();
+  void EndText();
+
+  const std::string& data_;
+  Token current_;
+
+  bool eof_;
+  size_t pos_;
+  char cur_char_;
+  size_t tok_start_;
+};
+
+}  // namespace init