Rewrite removeVarsAndFuncs.
The current version has these bugs:
- Adding a semicolon after a function results in the removal of structures
following the function.
- Function like macros get removed on accident rather than on purpose.
- It removes extern "C" { completely, which might not be a bug, but doesn't
seem right.
I couldn't easily fix any of these problems because the code depends heavily
on the header being correct.
New unit tests added for the function to cover all of these cases.
A follow-on CL will include the updated headers.
Bug: 112290385
Test: Passes all new unit tests.
Test: When run on the current kernel headers, the generated headers are
Test: nearly the same, missing data is being added.
Change-Id: Ib22a5f2e78873544e8a9d54e385af1156b2a72bb
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
index 336a9c8..1ada59e 100755
--- a/libc/kernel/tools/cpp.py
+++ b/libc/kernel/tools/cpp.py
@@ -1037,11 +1037,14 @@
if t.id == '{':
buf += ' {'
result.append(strip_space(buf))
- indent += 2
+ # Do not indent if this is extern "C" {
+ if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
+ indent += 2
buf = ''
newline = True
elif t.id == '}':
- indent -= 2
+ if indent >= 2:
+ indent -= 2
if not newline:
result.append(strip_space(buf))
# Look ahead to determine if it's the end of line.
@@ -1221,133 +1224,140 @@
function declarations are removed. We only accept typedefs and
enum/structs/union declarations.
+ In addition, remove any macros expanding in the headers. Usually,
+ these macros are static inline functions, which is why they are
+ removed.
+
However, we keep the definitions corresponding to the set of known
static inline functions in the set 'keep', which is useful
for optimized byteorder swap functions and stuff like that.
"""
- # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
- # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
- # It could be problematic when we have function-like macros but without
- # '}' following them. It will skip all the tokens/blocks until seeing a
- # '}' as the function end. Fortunately we don't have such cases in the
- # current kernel headers.
+ # state = NORMAL => normal (i.e. LN + spaces)
+ # state = OTHER_DECL => typedef/struct encountered, ends with ";"
+ # state = VAR_DECL => var declaration encountered, ends with ";"
+ # state = FUNC_DECL => func declaration encountered, ends with "}"
+ NORMAL = 0
+ OTHER_DECL = 1
+ VAR_DECL = 2
+ FUNC_DECL = 3
- # state = 0 => normal (i.e. LN + spaces)
- # state = 1 => typedef/struct encountered, ends with ";"
- # state = 2 => var declaration encountered, ends with ";"
- # state = 3 => func declaration encountered, ends with "}"
-
- state = 0
+ state = NORMAL
depth = 0
- blocks2 = []
- skipTokens = False
- for b in self.blocks:
- if b.isDirective():
- blocks2.append(b)
- else:
- n = len(b.tokens)
- i = 0
- if skipTokens:
- first = n
- else:
- first = 0
- while i < n:
- tok = b.tokens[i]
- tokid = tok.id
- # If we are not looking for the start of a new
- # type/var/func, then skip over tokens until
- # we find our terminator, managing the depth of
- # accolades as we go.
- if state > 0:
- terminator = False
- if tokid == '{':
- depth += 1
- elif tokid == '}':
- if depth > 0:
- depth -= 1
- if (depth == 0) and (state == 3):
- terminator = True
- elif tokid == ';' and depth == 0:
- terminator = True
-
- if terminator:
- # we found the terminator
- state = 0
- if skipTokens:
- skipTokens = False
- first = i + 1
-
- i += 1
- continue
-
- # Is it a new type definition, then start recording it
- if tok.id in ['struct', 'typedef', 'enum', 'union',
- '__extension__']:
- state = 1
- i += 1
- continue
-
- # Is it a variable or function definition. If so, first
- # try to determine which type it is, and also extract
- # its name.
- #
- # We're going to parse the next tokens of the same block
- # until we find a semicolon or a left parenthesis.
- #
- # The semicolon corresponds to a variable definition,
- # the left-parenthesis to a function definition.
- #
- # We also assume that the var/func name is the last
- # identifier before the terminator.
- #
- j = i + 1
- ident = ""
- while j < n:
- tokid = b.tokens[j].id
- if tokid == '(': # a function declaration
- state = 3
- break
- elif tokid == ';': # a variable declaration
- state = 2
- break
- if b.tokens[j].kind == TokenKind.IDENTIFIER:
- ident = b.tokens[j].id
- j += 1
-
- if j >= n:
- # This can only happen when the declaration
- # does not end on the current block (e.g. with
- # a directive mixed inside it.
- #
- # We will treat it as malformed because
- # it's very hard to recover from this case
- # without making our parser much more
- # complex.
- #
- logging.debug("### skip unterminated static '%s'",
- ident)
- break
-
- if ident in keep:
- logging.debug("### keep var/func '%s': %s", ident,
- repr(b.tokens[i:j]))
+ blocksToKeep = []
+ blocksInProgress = []
+ blocksOfDirectives = []
+ ident = ""
+ state_token = ""
+ macros = set()
+ for block in self.blocks:
+ if block.isDirective():
+ # Record all macros.
+ if block.directive == 'define':
+ macro_name = block.define_id
+ paren_index = macro_name.find('(')
+ if paren_index == -1:
+ macros.add(macro_name)
else:
- # We're going to skip the tokens for this declaration
- logging.debug("### skip var/func '%s': %s", ident,
- repr(b.tokens[i:j]))
- if i > first:
- blocks2.append(Block(b.tokens[first:i]))
- skipTokens = True
- first = n
+ macros.add(macro_name[0:paren_index])
+ blocksInProgress.append(block)
+ # If this is in a function/variable declaration, we might need
+ # to emit the directives alone, so save them separately.
+ blocksOfDirectives.append(block)
+ continue
- i += 1
+ numTokens = len(block.tokens)
+ lastTerminatorIndex = 0
+ i = 0
+ while i < numTokens:
+ token_id = block.tokens[i].id
+ terminator = False
+ if token_id == '{':
+ depth += 1
+ if (i >= 2 and block.tokens[i-2].id == 'extern' and
+ block.tokens[i-1].id == '"C"'):
+ # For an extern "C" { pretend as though this is depth 0.
+ depth -= 1
+ elif token_id == '}':
+ if depth > 0:
+ depth -= 1
+ if depth == 0:
+ if state == OTHER_DECL:
+ # Loop through until we hit the ';'
+ i += 1
+ while i < numTokens:
+ if block.tokens[i].id == ';':
+ token_id = ';'
+ break
+ i += 1
+ # If we didn't hit the ';', just consider this the
+ # terminator any way.
+ terminator = True
+ elif depth == 0:
+ if token_id == ';':
+ if state == NORMAL:
+ blocksToKeep.extend(blocksInProgress)
+ blocksInProgress = []
+ blocksOfDirectives = []
+ state = FUNC_DECL
+ terminator = True
+ elif (state == NORMAL and token_id == '(' and i >= 1 and
+ block.tokens[i-1].kind == TokenKind.IDENTIFIER and
+ block.tokens[i-1].id in macros):
+ # This is a plain macro being expanded in the header
+ # which needs to be removed.
+ blocksToKeep.extend(blocksInProgress)
+ if lastTerminatorIndex < i - 1:
+ blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
+ blocksInProgress = []
+ blocksOfDirectives = []
- if i > first:
- #print "### final '%s'" % repr(b.tokens[first:i])
- blocks2.append(Block(b.tokens[first:i]))
+ # Skip until we see the terminating ')'
+ i += 1
+ paren_depth = 1
+ while i < numTokens:
+ if block.tokens[i].id == ')':
+ paren_depth -= 1
+ if paren_depth == 0:
+ break
+ elif block.tokens[i].id == '(':
+ paren_depth += 1
+ i += 1
+ lastTerminatorIndex = i + 1
+ elif (state != FUNC_DECL and token_id == '(' and
+ state_token != 'typedef'):
+ blocksToKeep.extend(blocksInProgress)
+ blocksInProgress = []
+ blocksOfDirectives = []
+ state = VAR_DECL
+ elif state == NORMAL and token_id in ['struct', 'typedef',
+ 'enum', 'union',
+ '__extension__']:
+ state = OTHER_DECL
+ state_token = token_id
+ elif block.tokens[i].kind == TokenKind.IDENTIFIER:
+ if state != VAR_DECL or ident == "":
+ ident = token_id
- self.blocks = blocks2
+ if terminator:
+ if state != VAR_DECL and state != FUNC_DECL or ident in keep:
+ blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
+ blocksToKeep.extend(blocksInProgress)
+ else:
+ # Only keep the directives found.
+ blocksToKeep.extend(blocksOfDirectives)
+ lastTerminatorIndex = i + 1
+ blocksInProgress = []
+ blocksOfDirectives = []
+ state = NORMAL
+ ident = ""
+ state_token = ""
+ i += 1
+ if lastTerminatorIndex < numTokens:
+ blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
+ if len(blocksInProgress) > 0:
+ blocksToKeep.extend(blocksInProgress)
+ self.blocks = blocksToKeep
def replaceTokens(self, replacements):
"""Replace tokens according to the given dict."""
@@ -1938,6 +1948,299 @@
expected = ""
self.assertEqual(self.parse(text), expected)
+class FullPathTest(unittest.TestCase):
+ """Test of the full path parsing."""
+
+ def parse(self, text, keep=None):
+ if not keep:
+ keep = set()
+ out = utils.StringOutput()
+ blocks = BlockParser().parse(CppStringTokenizer(text))
+ blocks.removeVarsAndFuncs(keep)
+ blocks.replaceTokens(kernel_token_replacements)
+ blocks.optimizeAll(None)
+ blocks.write(out)
+ return out.get()
+
+ def test_function_removed(self):
+ text = """\
+static inline __u64 function()
+{
+}
+"""
+ expected = ""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_function_removed_with_struct(self):
+ text = """\
+static inline struct something* function()
+{
+}
+"""
+ expected = ""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_function_kept(self):
+ text = """\
+static inline __u64 function()
+{
+}
+"""
+ expected = """\
+static inline __u64 function() {
+}
+"""
+ self.assertEqual(self.parse(text, set(["function"])), expected)
+
+ def test_var_removed(self):
+ text = "__u64 variable;"
+ expected = ""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_var_kept(self):
+ text = "__u64 variable;"
+ expected = "__u64 variable;\n"
+ self.assertEqual(self.parse(text, set(["variable"])), expected)
+
+ def test_keep_function_typedef(self):
+ text = "typedef void somefunction_t(void);"
+ expected = "typedef void somefunction_t(void);\n"
+ self.assertEqual(self.parse(text), expected)
+
+ def test_struct_keep_attribute(self):
+ text = """\
+struct something_s {
+ __u32 s1;
+ __u32 s2;
+} __attribute__((packed));
+"""
+ expected = """\
+struct something_s {
+ __u32 s1;
+ __u32 s2;
+} __attribute__((packed));
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_function_keep_attribute_structs(self):
+ text = """\
+static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
+}
+"""
+ expected = """\
+static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
+}
+"""
+ self.assertEqual(self.parse(text, set(["function"])), expected)
+
+ def test_struct_after_struct(self):
+ text = """\
+struct first {
+};
+
+struct second {
+ unsigned short s1;
+#define SOMETHING 8
+ unsigned short s2;
+};
+"""
+ expected = """\
+struct first {
+};
+struct second {
+ unsigned short s1;
+#define SOMETHING 8
+ unsigned short s2;
+};
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_other_not_removed(self):
+ text = """\
+typedef union {
+ __u64 tu1;
+ __u64 tu2;
+} typedef_name;
+
+union {
+ __u64 u1;
+ __u64 u2;
+};
+
+struct {
+ __u64 s1;
+ __u64 s2;
+};
+
+enum {
+ ENUM1 = 0,
+ ENUM2,
+};
+
+__extension__ typedef __signed__ long long __s64;
+"""
+ expected = """\
+typedef union {
+ __u64 tu1;
+ __u64 tu2;
+} typedef_name;
+union {
+ __u64 u1;
+ __u64 u2;
+};
+struct {
+ __u64 s1;
+ __u64 s2;
+};
+enum {
+ ENUM1 = 0,
+ ENUM2,
+};
+__extension__ typedef __signed__ long long __s64;
+"""
+
+ self.assertEqual(self.parse(text), expected)
+
+ def test_semicolon_after_function(self):
+ text = """\
+static inline __u64 function()
+{
+};
+
+struct should_see {
+ __u32 field;
+};
+"""
+ expected = """\
+struct should_see {
+ __u32 field;
+};
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_define_in_middle_keep(self):
+ text = """\
+enum {
+ ENUM0 = 0x10,
+ ENUM1 = 0x20,
+#define SOMETHING SOMETHING_ELSE
+ ENUM2 = 0x40,
+};
+"""
+ expected = """\
+enum {
+ ENUM0 = 0x10,
+ ENUM1 = 0x20,
+#define SOMETHING SOMETHING_ELSE
+ ENUM2 = 0x40,
+};
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_define_in_middle_remove(self):
+ text = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+ i = 0;
+ {
+ i = 1;
+ }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+ expected = """\
+#define SOMETHING1 SOMETHING_ELSE1
+#define SOMETHING2 SOMETHING_ELSE2
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_define_in_middle_force_keep(self):
+ text = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+ i = 0;
+ {
+ i = 1;
+ }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+ expected = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+ i = 0;
+ {
+ i = 1;
+ }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+ self.assertEqual(self.parse(text, set(["function"])), expected)
+
+ def test_define_before_remove(self):
+ text = """\
+#define SHOULD_BE_KEPT NOTHING1
+#define ANOTHER_TO_KEEP NOTHING2
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+ i = 0;
+ {
+ i = 1;
+ }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+ expected = """\
+#define SHOULD_BE_KEPT NOTHING1
+#define ANOTHER_TO_KEEP NOTHING2
+#define SOMETHING1 SOMETHING_ELSE1
+#define SOMETHING2 SOMETHING_ELSE2
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_extern_C(self):
+ text = """\
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct something {
+};
+
+#if defined(__cplusplus)
+}
+#endif
+"""
+ expected = """\
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct something {
+};
+#ifdef __cplusplus
+}
+#endif
+"""
+ self.assertEqual(self.parse(text), expected)
+
+ def test_macro_definition_removed(self):
+ text = """\
+#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
+MACRO_FUNCTION_NO_PARAMS()
+
+#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
+MACRO_FUNCTION_PARAMS(a = 1)
+
+something that should still be kept
+MACRO_FUNCTION_PARAMS(b)
+"""
+ expected = """\
+#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
+#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
+something that should still be kept
+"""
+ self.assertEqual(self.parse(text), expected)
+
if __name__ == '__main__':
unittest.main()