Blame - init/tokenizer_test.cpp - android_system_core

blob: 012288458926ab11ad4439792cc511bcbc2532c8 [file] [log] [blame]

Tom Cherry	2a5a4e7	2018-06-26 13:56:34 -0700	[diff] [blame]	1	//
				2	// Copyright (C) 2018 The Android Open Source Project
				3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15	//
				16
				17	#include "tokenizer.h"
				18
				19	#include <string>
				20	#include <vector>
				21
				22	#include <gtest/gtest.h>
				23
				24	namespace android {
				25	namespace init {
				26
				27	namespace {
				28
				29	void RunTest(const std::string& data, const std::vector<std::vector<std::string>>& expected_tokens) {
				30	auto data_copy = std::string{data};
Tom Cherry	85f2bc9	2020-04-10 10:15:30 -0700	[diff] [blame]	31	data_copy.push_back('\n');
Tom Cherry	2a5a4e7	2018-06-26 13:56:34 -0700	[diff] [blame]	32	data_copy.push_back('\0');
				33
				34	parse_state state;
				35	state.line = 0;
				36	state.ptr = data_copy.data();
				37	state.nexttoken = 0;
				38
				39	std::vector<std::string> current_line;
				40	std::vector<std::vector<std::string>> tokens;
				41
				42	while (true) {
				43	switch (next_token(&state)) {
				44	case T_EOF:
				45	EXPECT_EQ(expected_tokens, tokens) << data;
				46	return;
				47	case T_NEWLINE:
				48	tokens.emplace_back(std::move(current_line));
Tom Cherry	247ffbf	2019-07-08 15:09:36 -0700	[diff] [blame]	49	current_line.clear();
Tom Cherry	2a5a4e7	2018-06-26 13:56:34 -0700	[diff] [blame]	50	break;
				51	case T_TEXT:
				52	current_line.emplace_back(state.text);
				53	break;
				54	}
				55	}
				56	}
				57
				58	} // namespace
				59
				60	TEST(tokenizer, null) {
				61	RunTest("", {{}});
				62	}
				63
				64	TEST(tokenizer, simple_oneline) {
				65	RunTest("one two\tthree\rfour", {{"one", "two", "three", "four"}});
				66	}
				67
				68	TEST(tokenizer, simple_multiline) {
				69	RunTest("1 2 3\n4 5 6\n7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}});
				70	}
				71
				72	TEST(tokenizer, preceding_space) {
				73	// Preceding spaces are ignored.
				74	RunTest(" 1 2 3\n\t\t\t\t4 5 6\n\r\r\r\r7 8 9",
				75	{{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}});
				76	}
				77
				78	TEST(tokenizer, comments) {
				79	// Entirely commented lines still produce a T_NEWLINE token for tracking line count.
				80	RunTest("1 2 3\n#4 5 6\n7 8 9", {{"1", "2", "3"}, {}, {"7", "8", "9"}});
				81
				82	RunTest("#1 2 3\n4 5 6\n7 8 9", {{}, {"4", "5", "6"}, {"7", "8", "9"}});
				83
				84	RunTest("1 2 3\n4 5 6\n#7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {}});
				85
				86	RunTest("1 2 #3\n4 #5 6\n#7 8 9", {{"1", "2"}, {"4"}, {}});
				87	}
				88
				89	TEST(tokenizer, control_chars) {
				90	// Literal \n, \r, \t, and \\ produce the control characters \n, \r, \t, and \\ respectively.
				91	// Literal \? produces ? for all other character '?'
				92
				93	RunTest(R"(1 token\ntoken 2)", {{"1", "token\ntoken", "2"}});
				94	RunTest(R"(1 token\rtoken 2)", {{"1", "token\rtoken", "2"}});
				95	RunTest(R"(1 token\ttoken 2)", {{"1", "token\ttoken", "2"}});
				96	RunTest(R"(1 token\\token 2)", {{"1", "token\\token", "2"}});
				97	RunTest(R"(1 token\btoken 2)", {{"1", "tokenbtoken", "2"}});
				98
				99	RunTest(R"(1 token\n 2)", {{"1", "token\n", "2"}});
				100	RunTest(R"(1 token\r 2)", {{"1", "token\r", "2"}});
				101	RunTest(R"(1 token\t 2)", {{"1", "token\t", "2"}});
				102	RunTest(R"(1 token\\ 2)", {{"1", "token\\", "2"}});
				103	RunTest(R"(1 token\b 2)", {{"1", "tokenb", "2"}});
				104
				105	RunTest(R"(1 \ntoken 2)", {{"1", "\ntoken", "2"}});
				106	RunTest(R"(1 \rtoken 2)", {{"1", "\rtoken", "2"}});
				107	RunTest(R"(1 \ttoken 2)", {{"1", "\ttoken", "2"}});
				108	RunTest(R"(1 \\token 2)", {{"1", "\\token", "2"}});
				109	RunTest(R"(1 \btoken 2)", {{"1", "btoken", "2"}});
				110
				111	RunTest(R"(1 \n 2)", {{"1", "\n", "2"}});
				112	RunTest(R"(1 \r 2)", {{"1", "\r", "2"}});
				113	RunTest(R"(1 \t 2)", {{"1", "\t", "2"}});
				114	RunTest(R"(1 \\ 2)", {{"1", "\\", "2"}});
				115	RunTest(R"(1 \b 2)", {{"1", "b", "2"}});
				116	}
				117
				118	TEST(tokenizer, cr_lf) {
				119	// \ before \n, \r, or \r\n is interpreted as a line continuation
				120	// Extra whitespace on the next line is eaten, except \r unlike in the above tests.
				121
				122	RunTest("lf\\\ncont", {{"lfcont"}});
				123	RunTest("lf\\\n \t\t\t\tcont", {{"lfcont"}});
				124
				125	RunTest("crlf\\\r\ncont", {{"crlfcont"}});
				126	RunTest("crlf\\\r\n \t\t\t\tcont", {{"crlfcont"}});
				127
				128	RunTest("cr\\\rcont", {{"crcont"}});
				129
				130	RunTest("lfspace \\\ncont", {{"lfspace", "cont"}});
				131	RunTest("lfspace \\\n \t\t\t\tcont", {{"lfspace", "cont"}});
				132
				133	RunTest("crlfspace \\\r\ncont", {{"crlfspace", "cont"}});
				134	RunTest("crlfspace \\\r\n \t\t\t\tcont", {{"crlfspace", "cont"}});
				135
				136	RunTest("crspace \\\rcont", {{"crspace", "cont"}});
				137	}
				138
				139	TEST(tokenizer, quoted) {
				140	RunTest("\"quoted simple string\"", {{"quoted simple string"}});
				141
				142	// Unterminated quotes just return T_EOF without any T_NEWLINE.
				143	RunTest("\"unterminated quoted string", {});
				144
				145	RunTest("\"1 2 3\"\n \"unterminated quoted string", {{"1 2 3"}});
				146
				147	// Escaping quotes is not allowed and are treated as an unterminated quoted string.
				148	RunTest("\"quoted escaped quote\\\"\"", {});
				149	RunTest("\"quoted escaped\\\" quote\"", {});
				150	RunTest("\"\\\"quoted escaped quote\"", {});
				151
				152	RunTest("\"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n\"",
				153	{{"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n"}});
				154
				155	RunTest("\"quoted simple string\" \"second quoted string\"",
				156	{{"quoted simple string", "second quoted string"}});
				157
				158	RunTest("\"# comment quoted string\"", {{"# comment quoted string"}});
				159
				160	RunTest("\"Adjacent \"\"quoted strings\"", {{"Adjacent quoted strings"}});
				161	}
				162
				163	} // namespace init
				164	} // namespace android