| Lee Campbell | 220ca84 | 2015-07-30 09:27:11 -0700 | [diff] [blame] | 1 | // Copyright (C) 2015 The Android Open Source Project | 
|  | 2 | // | 
|  | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | // you may not use this file except in compliance with the License. | 
|  | 5 | // You may obtain a copy of the License at | 
|  | 6 | // | 
|  | 7 | //      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | // | 
|  | 9 | // Unless required by applicable law or agreed to in writing, software | 
|  | 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | // See the License for the specific language governing permissions and | 
|  | 13 | // limitations under the License. | 
|  | 14 |  | 
| Lee Campbell | 0658440 | 2015-07-30 18:48:50 -0700 | [diff] [blame] | 15 | #ifndef _INIT_PARSER_TOKENIZER_H | 
|  | 16 | #define _INIT_PARSER_TOKENIZER_H | 
|  | 17 |  | 
| Lee Campbell | 220ca84 | 2015-07-30 09:27:11 -0700 | [diff] [blame] | 18 | #include <string> | 
|  | 19 |  | 
|  | 20 | namespace init { | 
|  | 21 |  | 
|  | 22 | // Used to tokenize a std::string. | 
|  | 23 | // Call Next() to advance through each token until it returns false, | 
|  | 24 | // indicating there are no more tokens left in the string. | 
|  | 25 | // The current token can be accessed with current(), which returns | 
|  | 26 | // a Token. | 
|  | 27 | // Supported tokens are: | 
|  | 28 | // TOK_START - Next() has yet to be called | 
|  | 29 | // TOK_END - At the end of string | 
|  | 30 | // TOK_NEWLINE - The end of a line denoted by \n. | 
|  | 31 | // TOK_TEXT - A word. | 
|  | 32 | // Comments are denoted with '#' and the tokenizer will ignore | 
|  | 33 | // the rest of the line. | 
|  | 34 | // Double quotes can be used to insert whitespace into words. | 
|  | 35 | // A backslash at the end of a line denotes continuation and | 
|  | 36 | // a TOK_NEWLINE will not be generated for that line. | 
|  | 37 | class Tokenizer { | 
|  | 38 | public: | 
|  | 39 | Tokenizer(const std::string& data); | 
|  | 40 | ~Tokenizer(); | 
|  | 41 |  | 
|  | 42 | enum TokenType { TOK_START, TOK_END, TOK_NEWLINE, TOK_TEXT }; | 
|  | 43 | struct Token { | 
|  | 44 | TokenType type; | 
|  | 45 | std::string text; | 
|  | 46 | }; | 
|  | 47 |  | 
|  | 48 | // Returns the curret token. | 
|  | 49 | const Token& current(); | 
|  | 50 |  | 
|  | 51 | // Move to the next token, returns false at the end of input. | 
|  | 52 | bool Next(); | 
|  | 53 |  | 
|  | 54 | private: | 
|  | 55 | void GetData(); | 
|  | 56 | void AdvChar(); | 
|  | 57 | void AdvText(); | 
|  | 58 | void AdvUntil(char x); | 
|  | 59 | void AdvWhiteSpace(); | 
|  | 60 | void StartText(); | 
|  | 61 | void EndText(); | 
|  | 62 |  | 
|  | 63 | const std::string& data_; | 
|  | 64 | Token current_; | 
|  | 65 |  | 
|  | 66 | bool eof_; | 
|  | 67 | size_t pos_; | 
|  | 68 | char cur_char_; | 
|  | 69 | size_t tok_start_; | 
|  | 70 | }; | 
|  | 71 |  | 
|  | 72 | }  // namespace init | 
| Lee Campbell | 0658440 | 2015-07-30 18:48:50 -0700 | [diff] [blame] | 73 |  | 
|  | 74 | #endif |