| Lee Campbell | 220ca84 | 2015-07-30 09:27:11 -0700 | [diff] [blame] | 1 | // Copyright (C) 2015 The Android Open Source Project | 
|  | 2 | // | 
|  | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | // you may not use this file except in compliance with the License. | 
|  | 5 | // You may obtain a copy of the License at | 
|  | 6 | // | 
|  | 7 | //      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | // | 
|  | 9 | // Unless required by applicable law or agreed to in writing, software | 
|  | 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | // See the License for the specific language governing permissions and | 
|  | 13 | // limitations under the License. | 
|  | 14 |  | 
|  | 15 | #include "tokenizer.h" | 
|  | 16 |  | 
|  | 17 | namespace init { | 
|  | 18 |  | 
|  | 19 | Tokenizer::Tokenizer(const std::string& data) | 
|  | 20 | : data_(data), eof_(false), pos_(0), tok_start_(0) { | 
|  | 21 | current_.type = TOK_START; | 
|  | 22 |  | 
|  | 23 | if (data.size() > 0) { | 
|  | 24 | cur_char_ = data[0]; | 
|  | 25 | } else { | 
|  | 26 | eof_ = true; | 
|  | 27 | cur_char_ = '\0'; | 
|  | 28 | } | 
|  | 29 | } | 
|  | 30 |  | 
|  | 31 | Tokenizer::~Tokenizer() {} | 
|  | 32 |  | 
|  | 33 | const Tokenizer::Token& Tokenizer::current() { | 
|  | 34 | return current_; | 
|  | 35 | } | 
|  | 36 |  | 
|  | 37 | bool Tokenizer::Next() { | 
|  | 38 | while (!eof_) { | 
|  | 39 | AdvWhiteSpace(); | 
|  | 40 |  | 
|  | 41 | // Check for comments. | 
|  | 42 | if (cur_char_ == '#') { | 
|  | 43 | AdvChar(); | 
|  | 44 | // Skip rest of line | 
|  | 45 | while (!eof_ && cur_char_ != '\n') { | 
|  | 46 | AdvChar(); | 
|  | 47 | } | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | if (eof_) { | 
|  | 51 | break; | 
|  | 52 | } | 
|  | 53 |  | 
|  | 54 | if (cur_char_ == '\0') { | 
|  | 55 | AdvChar(); | 
|  | 56 | } else if (cur_char_ == '\n') { | 
|  | 57 | current_.type = TOK_NEWLINE; | 
|  | 58 | current_.text.clear(); | 
|  | 59 | AdvChar(); | 
|  | 60 | return true; | 
|  | 61 | } else if (cur_char_ == '\\') { | 
|  | 62 | AdvChar();  // skip backslash | 
|  | 63 | // This is line continuation so | 
|  | 64 | // do not generated TOK_NEWLINE at | 
|  | 65 | // the next \n. | 
|  | 66 | AdvUntil('\n'); | 
|  | 67 | AdvChar();  // skip \n | 
|  | 68 | } else if (cur_char_ == '\"') { | 
|  | 69 | AdvChar(); | 
|  | 70 | StartText(); | 
|  | 71 | // Grab everything until the next quote. | 
|  | 72 | AdvUntil('\"'); | 
|  | 73 | EndText(); | 
|  | 74 | AdvChar();  // skip quote. | 
|  | 75 | return true; | 
|  | 76 | } else { | 
|  | 77 | StartText(); | 
|  | 78 | AdvText(); | 
|  | 79 | EndText(); | 
|  | 80 | return true; | 
|  | 81 | } | 
|  | 82 | } | 
|  | 83 | current_.type = TOK_END; | 
|  | 84 | current_.text.clear(); | 
|  | 85 | return false; | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | void Tokenizer::AdvChar() { | 
|  | 89 | pos_++; | 
|  | 90 | if (pos_ < data_.size()) { | 
|  | 91 | cur_char_ = data_[pos_]; | 
|  | 92 | } else { | 
|  | 93 | eof_ = true; | 
|  | 94 | cur_char_ = '\0'; | 
|  | 95 | } | 
|  | 96 | } | 
|  | 97 |  | 
|  | 98 | void Tokenizer::AdvWhiteSpace() { | 
|  | 99 | while (cur_char_ == '\t' || cur_char_ == '\r' || cur_char_ == ' ') { | 
|  | 100 | AdvChar(); | 
|  | 101 | } | 
|  | 102 | } | 
|  | 103 |  | 
|  | 104 | void Tokenizer::AdvUntil(char x) { | 
|  | 105 | while (!eof_ && cur_char_ != x) { | 
|  | 106 | AdvChar(); | 
|  | 107 | } | 
|  | 108 | } | 
|  | 109 |  | 
|  | 110 | void Tokenizer::AdvText() { | 
|  | 111 | while (cur_char_ != '\t' && cur_char_ != '\r' && cur_char_ != '\0' && | 
|  | 112 | cur_char_ != ' ' && cur_char_ != '\n' && cur_char_ != '#') { | 
|  | 113 | AdvChar(); | 
|  | 114 | } | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | void Tokenizer::StartText() { | 
|  | 118 | current_.text.clear(); | 
|  | 119 | tok_start_ = pos_; | 
|  | 120 | current_.type = TOK_TEXT; | 
|  | 121 | } | 
|  | 122 |  | 
|  | 123 | void Tokenizer::EndText() { | 
|  | 124 | if (pos_ != tok_start_) { | 
|  | 125 | current_.text.append(data_, tok_start_, pos_ - tok_start_); | 
|  | 126 | } | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | }  // namespace init |