| Tom Cherry | 2a5a4e7 | 2018-06-26 13:56:34 -0700 | [diff] [blame] | 1 | // | 
 | 2 | // Copyright (C) 2018 The Android Open Source Project | 
 | 3 | // | 
 | 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 5 | // you may not use this file except in compliance with the License. | 
 | 6 | // You may obtain a copy of the License at | 
 | 7 | // | 
 | 8 | //      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 9 | // | 
 | 10 | // Unless required by applicable law or agreed to in writing, software | 
 | 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 13 | // See the License for the specific language governing permissions and | 
 | 14 | // limitations under the License. | 
 | 15 | // | 
 | 16 |  | 
 | 17 | #include "tokenizer.h" | 
 | 18 |  | 
 | 19 | #include <string> | 
 | 20 | #include <vector> | 
 | 21 |  | 
 | 22 | #include <gtest/gtest.h> | 
 | 23 |  | 
 | 24 | namespace android { | 
 | 25 | namespace init { | 
 | 26 |  | 
 | 27 | namespace { | 
 | 28 |  | 
 | 29 | void RunTest(const std::string& data, const std::vector<std::vector<std::string>>& expected_tokens) { | 
 | 30 |     auto data_copy = std::string{data}; | 
| Tom Cherry | 85f2bc9 | 2020-04-10 10:15:30 -0700 | [diff] [blame] | 31 |     data_copy.push_back('\n'); | 
| Tom Cherry | 2a5a4e7 | 2018-06-26 13:56:34 -0700 | [diff] [blame] | 32 |     data_copy.push_back('\0'); | 
 | 33 |  | 
 | 34 |     parse_state state; | 
 | 35 |     state.line = 0; | 
 | 36 |     state.ptr = data_copy.data(); | 
 | 37 |     state.nexttoken = 0; | 
 | 38 |  | 
 | 39 |     std::vector<std::string> current_line; | 
 | 40 |     std::vector<std::vector<std::string>> tokens; | 
 | 41 |  | 
 | 42 |     while (true) { | 
 | 43 |         switch (next_token(&state)) { | 
 | 44 |             case T_EOF: | 
 | 45 |                 EXPECT_EQ(expected_tokens, tokens) << data; | 
 | 46 |                 return; | 
 | 47 |             case T_NEWLINE: | 
 | 48 |                 tokens.emplace_back(std::move(current_line)); | 
| Tom Cherry | 247ffbf | 2019-07-08 15:09:36 -0700 | [diff] [blame] | 49 |                 current_line.clear(); | 
| Tom Cherry | 2a5a4e7 | 2018-06-26 13:56:34 -0700 | [diff] [blame] | 50 |                 break; | 
 | 51 |             case T_TEXT: | 
 | 52 |                 current_line.emplace_back(state.text); | 
 | 53 |                 break; | 
 | 54 |         } | 
 | 55 |     } | 
 | 56 | } | 
 | 57 |  | 
 | 58 | }  // namespace | 
 | 59 |  | 
 | 60 | TEST(tokenizer, null) { | 
 | 61 |     RunTest("", {{}}); | 
 | 62 | } | 
 | 63 |  | 
 | 64 | TEST(tokenizer, simple_oneline) { | 
 | 65 |     RunTest("one two\tthree\rfour", {{"one", "two", "three", "four"}}); | 
 | 66 | } | 
 | 67 |  | 
 | 68 | TEST(tokenizer, simple_multiline) { | 
 | 69 |     RunTest("1 2 3\n4 5 6\n7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}}); | 
 | 70 | } | 
 | 71 |  | 
 | 72 | TEST(tokenizer, preceding_space) { | 
 | 73 |     // Preceding spaces are ignored. | 
 | 74 |     RunTest("    1 2 3\n\t\t\t\t4 5 6\n\r\r\r\r7 8 9", | 
 | 75 |             {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}}); | 
 | 76 | } | 
 | 77 |  | 
 | 78 | TEST(tokenizer, comments) { | 
 | 79 |     // Entirely commented lines still produce a T_NEWLINE token for tracking line count. | 
 | 80 |     RunTest("1 2 3\n#4 5 6\n7 8 9", {{"1", "2", "3"}, {}, {"7", "8", "9"}}); | 
 | 81 |  | 
 | 82 |     RunTest("#1 2 3\n4 5 6\n7 8 9", {{}, {"4", "5", "6"}, {"7", "8", "9"}}); | 
 | 83 |  | 
 | 84 |     RunTest("1 2 3\n4 5 6\n#7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {}}); | 
 | 85 |  | 
 | 86 |     RunTest("1 2 #3\n4 #5 6\n#7 8 9", {{"1", "2"}, {"4"}, {}}); | 
 | 87 | } | 
 | 88 |  | 
 | 89 | TEST(tokenizer, control_chars) { | 
 | 90 |     // Literal \n, \r, \t, and \\ produce the control characters \n, \r, \t, and \\ respectively. | 
 | 91 |     // Literal \? produces ? for all other character '?' | 
 | 92 |  | 
 | 93 |     RunTest(R"(1 token\ntoken 2)", {{"1", "token\ntoken", "2"}}); | 
 | 94 |     RunTest(R"(1 token\rtoken 2)", {{"1", "token\rtoken", "2"}}); | 
 | 95 |     RunTest(R"(1 token\ttoken 2)", {{"1", "token\ttoken", "2"}}); | 
 | 96 |     RunTest(R"(1 token\\token 2)", {{"1", "token\\token", "2"}}); | 
 | 97 |     RunTest(R"(1 token\btoken 2)", {{"1", "tokenbtoken", "2"}}); | 
 | 98 |  | 
 | 99 |     RunTest(R"(1 token\n 2)", {{"1", "token\n", "2"}}); | 
 | 100 |     RunTest(R"(1 token\r 2)", {{"1", "token\r", "2"}}); | 
 | 101 |     RunTest(R"(1 token\t 2)", {{"1", "token\t", "2"}}); | 
 | 102 |     RunTest(R"(1 token\\ 2)", {{"1", "token\\", "2"}}); | 
 | 103 |     RunTest(R"(1 token\b 2)", {{"1", "tokenb", "2"}}); | 
 | 104 |  | 
 | 105 |     RunTest(R"(1 \ntoken 2)", {{"1", "\ntoken", "2"}}); | 
 | 106 |     RunTest(R"(1 \rtoken 2)", {{"1", "\rtoken", "2"}}); | 
 | 107 |     RunTest(R"(1 \ttoken 2)", {{"1", "\ttoken", "2"}}); | 
 | 108 |     RunTest(R"(1 \\token 2)", {{"1", "\\token", "2"}}); | 
 | 109 |     RunTest(R"(1 \btoken 2)", {{"1", "btoken", "2"}}); | 
 | 110 |  | 
 | 111 |     RunTest(R"(1 \n 2)", {{"1", "\n", "2"}}); | 
 | 112 |     RunTest(R"(1 \r 2)", {{"1", "\r", "2"}}); | 
 | 113 |     RunTest(R"(1 \t 2)", {{"1", "\t", "2"}}); | 
 | 114 |     RunTest(R"(1 \\ 2)", {{"1", "\\", "2"}}); | 
 | 115 |     RunTest(R"(1 \b 2)", {{"1", "b", "2"}}); | 
 | 116 | } | 
 | 117 |  | 
 | 118 | TEST(tokenizer, cr_lf) { | 
 | 119 |     // \ before \n, \r, or \r\n is interpreted as a line continuation | 
 | 120 |     // Extra whitespace on the next line is eaten, except \r unlike in the above tests. | 
 | 121 |  | 
 | 122 |     RunTest("lf\\\ncont", {{"lfcont"}}); | 
 | 123 |     RunTest("lf\\\n    \t\t\t\tcont", {{"lfcont"}}); | 
 | 124 |  | 
 | 125 |     RunTest("crlf\\\r\ncont", {{"crlfcont"}}); | 
 | 126 |     RunTest("crlf\\\r\n    \t\t\t\tcont", {{"crlfcont"}}); | 
 | 127 |  | 
 | 128 |     RunTest("cr\\\rcont", {{"crcont"}}); | 
 | 129 |  | 
 | 130 |     RunTest("lfspace \\\ncont", {{"lfspace", "cont"}}); | 
 | 131 |     RunTest("lfspace \\\n    \t\t\t\tcont", {{"lfspace", "cont"}}); | 
 | 132 |  | 
 | 133 |     RunTest("crlfspace \\\r\ncont", {{"crlfspace", "cont"}}); | 
 | 134 |     RunTest("crlfspace \\\r\n    \t\t\t\tcont", {{"crlfspace", "cont"}}); | 
 | 135 |  | 
 | 136 |     RunTest("crspace \\\rcont", {{"crspace", "cont"}}); | 
 | 137 | } | 
 | 138 |  | 
 | 139 | TEST(tokenizer, quoted) { | 
 | 140 |     RunTest("\"quoted simple string\"", {{"quoted simple string"}}); | 
 | 141 |  | 
 | 142 |     // Unterminated quotes just return T_EOF without any T_NEWLINE. | 
 | 143 |     RunTest("\"unterminated quoted string", {}); | 
 | 144 |  | 
 | 145 |     RunTest("\"1 2 3\"\n \"unterminated quoted string", {{"1 2 3"}}); | 
 | 146 |  | 
 | 147 |     // Escaping quotes is not allowed and are treated as an unterminated quoted string. | 
 | 148 |     RunTest("\"quoted escaped quote\\\"\"", {}); | 
 | 149 |     RunTest("\"quoted escaped\\\" quote\"", {}); | 
 | 150 |     RunTest("\"\\\"quoted escaped quote\"", {}); | 
 | 151 |  | 
 | 152 |     RunTest("\"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n\"", | 
 | 153 |             {{"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n"}}); | 
 | 154 |  | 
 | 155 |     RunTest("\"quoted simple string\" \"second quoted string\"", | 
 | 156 |             {{"quoted simple string", "second quoted string"}}); | 
 | 157 |  | 
 | 158 |     RunTest("\"# comment quoted string\"", {{"# comment quoted string"}}); | 
 | 159 |  | 
 | 160 |     RunTest("\"Adjacent \"\"quoted strings\"", {{"Adjacent quoted strings"}}); | 
 | 161 | } | 
 | 162 |  | 
 | 163 | }  // namespace init | 
 | 164 | }  // namespace android |