|  | #!/usr/bin/python | 
|  | """A glorified C pre-processor parser.""" | 
|  |  | 
|  | import ctypes | 
|  | import logging | 
|  | import os | 
|  | import re | 
|  | import site | 
|  | import utils | 
|  |  | 
|  | top = os.getenv('ANDROID_BUILD_TOP') | 
|  | if top is None: | 
|  | utils.panic('ANDROID_BUILD_TOP not set.\n') | 
|  |  | 
|  | # Set up the env vars for libclang. | 
|  | site.addsitedir(os.path.join(top, 'external/clang/bindings/python')) | 
|  |  | 
|  | import clang.cindex | 
|  | from clang.cindex import conf | 
|  | from clang.cindex import Cursor | 
|  | from clang.cindex import CursorKind | 
|  | from clang.cindex import SourceLocation | 
|  | from clang.cindex import SourceRange | 
|  | from clang.cindex import TokenGroup | 
|  | from clang.cindex import TokenKind | 
|  | from clang.cindex import TranslationUnit | 
|  |  | 
|  | # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. | 
|  | # Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. | 
|  | clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64')) | 
|  |  | 
|  | from defaults import kCppUndefinedMacro | 
|  | from defaults import kernel_remove_config_macros | 
|  | from defaults import kernel_token_replacements | 
|  |  | 
|  |  | 
|  | debugBlockParser = False | 
|  | debugCppExpr = False | 
|  | debugOptimIf01 = False | 
|  |  | 
|  | ############################################################################### | 
|  | ############################################################################### | 
|  | #####                                                                     ##### | 
|  | #####           C P P   T O K E N S                                       ##### | 
|  | #####                                                                     ##### | 
|  | ############################################################################### | 
|  | ############################################################################### | 
|  |  | 
|  | # the list of supported C-preprocessor tokens | 
|  | # plus a couple of C tokens as well | 
|  | tokEOF = "\0" | 
|  | tokLN = "\n" | 
|  | tokSTRINGIFY = "#" | 
|  | tokCONCAT = "##" | 
|  | tokLOGICAND = "&&" | 
|  | tokLOGICOR = "||" | 
|  | tokSHL = "<<" | 
|  | tokSHR = ">>" | 
|  | tokEQUAL = "==" | 
|  | tokNEQUAL = "!=" | 
|  | tokLT = "<" | 
|  | tokLTE = "<=" | 
|  | tokGT = ">" | 
|  | tokGTE = ">=" | 
|  | tokELLIPSIS = "..." | 
|  | tokSPACE = " " | 
|  | tokDEFINED = "defined" | 
|  | tokLPAREN = "(" | 
|  | tokRPAREN = ")" | 
|  | tokNOT = "!" | 
|  | tokPLUS = "+" | 
|  | tokMINUS = "-" | 
|  | tokMULTIPLY = "*" | 
|  | tokDIVIDE = "/" | 
|  | tokMODULUS = "%" | 
|  | tokBINAND = "&" | 
|  | tokBINOR = "|" | 
|  | tokBINXOR = "^" | 
|  | tokCOMMA = "," | 
|  | tokLBRACE = "{" | 
|  | tokRBRACE = "}" | 
|  | tokARROW = "->" | 
|  | tokINCREMENT = "++" | 
|  | tokDECREMENT = "--" | 
|  | tokNUMBER = "<number>" | 
|  | tokIDENT = "<ident>" | 
|  | tokSTRING = "<string>" | 
|  |  | 
|  |  | 
|  | class Token(clang.cindex.Token): | 
|  | """A class that represents one token after parsing. | 
|  |  | 
|  | It inherits the class in libclang, with an extra id property to hold the | 
|  | new spelling of the token. The spelling property in the base class is | 
|  | defined as read-only. New names after macro instantiation are saved in | 
|  | their ids now. It also facilitates the renaming of directive optimizations | 
|  | like replacing 'ifndef X' with 'if !defined(X)'. | 
|  |  | 
|  | It also overrides the cursor property of the base class. Because the one | 
|  | in libclang always queries based on a single token, which usually doesn't | 
|  | hold useful information. The cursor in this class can be set by calling | 
|  | CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the | 
|  | base class. | 
|  | """ | 
|  |  | 
|  | def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, | 
|  | cursor=None): | 
|  | clang.cindex.Token.__init__(self) | 
|  | self._id = None | 
|  | self._tu = tu | 
|  | self._group = group | 
|  | self._cursor = cursor | 
|  | # self.int_data and self.ptr_data are from the base class. But | 
|  | # self.int_data doesn't accept a None value. | 
|  | if int_data is not None: | 
|  | self.int_data = int_data | 
|  | self.ptr_data = ptr_data | 
|  |  | 
|  | @property | 
|  | def id(self): | 
|  | """Name of the token.""" | 
|  | if self._id is None: | 
|  | return self.spelling | 
|  | else: | 
|  | return self._id | 
|  |  | 
|  | @id.setter | 
|  | def id(self, new_id): | 
|  | """Setting name of the token.""" | 
|  | self._id = new_id | 
|  |  | 
|  | @property | 
|  | def cursor(self): | 
|  | if self._cursor is None: | 
|  | self._cursor = clang.cindex.Token.cursor | 
|  | return self._cursor | 
|  |  | 
|  | @cursor.setter | 
|  | def cursor(self, new_cursor): | 
|  | self._cursor = new_cursor | 
|  |  | 
|  | def __repr__(self): | 
|  | if self.id == 'defined': | 
|  | return self.id | 
|  | elif self.kind == TokenKind.IDENTIFIER: | 
|  | return "(ident %s)" % self.id | 
|  |  | 
|  | return self.id | 
|  |  | 
|  | def __str__(self): | 
|  | return self.id | 
|  |  | 
|  |  | 
|  | class BadExpectedToken(Exception): | 
|  | """An exception that will be raised for unexpected tokens.""" | 
|  | pass | 
|  |  | 
|  |  | 
|  | # The __contains__ function in libclang SourceRange class contains a bug. It | 
|  | # gives wrong result when dealing with single line range. | 
|  | # Bug filed with upstream: | 
|  | # http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 | 
|  | def SourceRange__contains__(self, other): | 
|  | """Determine if a given location is inside the range.""" | 
|  | if not isinstance(other, SourceLocation): | 
|  | return False | 
|  | if other.file is None and self.start.file is None: | 
|  | pass | 
|  | elif (self.start.file.name != other.file.name or | 
|  | other.file.name != self.end.file.name): | 
|  | # same file name | 
|  | return False | 
|  | # same file, in between lines | 
|  | if self.start.line < other.line < self.end.line: | 
|  | return True | 
|  | # same file, same line | 
|  | elif self.start.line == other.line == self.end.line: | 
|  | if self.start.column <= other.column <= self.end.column: | 
|  | return True | 
|  | elif self.start.line == other.line: | 
|  | # same file first line | 
|  | if self.start.column <= other.column: | 
|  | return True | 
|  | elif other.line == self.end.line: | 
|  | # same file last line | 
|  | if other.column <= self.end.column: | 
|  | return True | 
|  | return False | 
|  |  | 
|  |  | 
|  | SourceRange.__contains__ = SourceRange__contains__ | 
|  |  | 
|  |  | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  | #####                                                                      ##### | 
|  | #####           C P P   T O K E N I Z E R                                  ##### | 
|  | #####                                                                      ##### | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  |  | 
|  |  | 
|  | class CppTokenizer(object): | 
|  | """A tokenizer that converts some input text into a list of tokens. | 
|  |  | 
|  | It calls libclang's tokenizer to get the parsed tokens. In addition, it | 
|  | updates the cursor property in each token after parsing, by calling | 
|  | getTokensWithCursors(). | 
|  | """ | 
|  |  | 
|  | clang_flags = ['-E', '-x', 'c'] | 
|  | options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD | 
|  |  | 
|  | def __init__(self): | 
|  | """Initialize a new CppTokenizer object.""" | 
|  | self._indexer = clang.cindex.Index.create() | 
|  | self._tu = None | 
|  | self._index = 0 | 
|  | self.tokens = None | 
|  |  | 
|  | def _getTokensWithCursors(self): | 
|  | """Helper method to return all tokens with their cursors. | 
|  |  | 
|  | The cursor property in a clang Token doesn't provide enough | 
|  | information. Because it is queried based on single token each time | 
|  | without any context, i.e. via calling conf.lib.clang_annotateTokens() | 
|  | with only one token given. So we often see 'INVALID_FILE' in one | 
|  | token's cursor. In this function it passes all the available tokens | 
|  | to get more informative cursors. | 
|  | """ | 
|  |  | 
|  | tokens_memory = ctypes.POINTER(clang.cindex.Token)() | 
|  | tokens_count = ctypes.c_uint() | 
|  |  | 
|  | conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, | 
|  | ctypes.byref(tokens_memory), | 
|  | ctypes.byref(tokens_count)) | 
|  |  | 
|  | count = int(tokens_count.value) | 
|  |  | 
|  | # If we get no tokens, no memory was allocated. Be sure not to return | 
|  | # anything and potentially call a destructor on nothing. | 
|  | if count < 1: | 
|  | return | 
|  |  | 
|  | cursors = (Cursor * count)() | 
|  | cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) | 
|  |  | 
|  | conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, | 
|  | cursors_memory) | 
|  |  | 
|  | tokens_array = ctypes.cast( | 
|  | tokens_memory, | 
|  | ctypes.POINTER(clang.cindex.Token * count)).contents | 
|  | token_group = TokenGroup(self._tu, tokens_memory, tokens_count) | 
|  |  | 
|  | tokens = [] | 
|  | for i in xrange(0, count): | 
|  | token = Token(self._tu, token_group, | 
|  | int_data=tokens_array[i].int_data, | 
|  | ptr_data=tokens_array[i].ptr_data, | 
|  | cursor=cursors[i]) | 
|  | # We only want non-comment tokens. | 
|  | if token.kind != TokenKind.COMMENT: | 
|  | tokens.append(token) | 
|  |  | 
|  | return tokens | 
|  |  | 
|  | def parseString(self, lines): | 
|  | """Parse a list of text lines into a BlockList object.""" | 
|  | file_ = 'dummy.c' | 
|  | self._tu = self._indexer.parse(file_, self.clang_flags, | 
|  | unsaved_files=[(file_, lines)], | 
|  | options=self.options) | 
|  | self.tokens = self._getTokensWithCursors() | 
|  |  | 
|  | def parseFile(self, file_): | 
|  | """Parse a file into a BlockList object.""" | 
|  | self._tu = self._indexer.parse(file_, self.clang_flags, | 
|  | options=self.options) | 
|  | self.tokens = self._getTokensWithCursors() | 
|  |  | 
|  | def nextToken(self): | 
|  | """Return next token from the list.""" | 
|  | if self._index < len(self.tokens): | 
|  | t = self.tokens[self._index] | 
|  | self._index += 1 | 
|  | return t | 
|  | else: | 
|  | return None | 
|  |  | 
|  |  | 
|  | class CppStringTokenizer(CppTokenizer): | 
|  | """A CppTokenizer derived class that accepts a string of text as input.""" | 
|  |  | 
|  | def __init__(self, line): | 
|  | CppTokenizer.__init__(self) | 
|  | self.parseString(line) | 
|  |  | 
|  |  | 
|  | class CppFileTokenizer(CppTokenizer): | 
|  | """A CppTokenizer derived class that accepts a file as input.""" | 
|  |  | 
|  | def __init__(self, file_): | 
|  | CppTokenizer.__init__(self) | 
|  | self.parseFile(file_) | 
|  |  | 
|  |  | 
|  | # Unit testing | 
|  | # | 
|  | class CppTokenizerTester(object): | 
|  | """A class used to test CppTokenizer classes.""" | 
|  |  | 
|  | def __init__(self, tokenizer=None): | 
|  | self._tokenizer = tokenizer | 
|  | self._token = None | 
|  |  | 
|  | def setTokenizer(self, tokenizer): | 
|  | self._tokenizer = tokenizer | 
|  |  | 
|  | def expect(self, id): | 
|  | self._token = self._tokenizer.nextToken() | 
|  | if self._token is None: | 
|  | tokid = '' | 
|  | else: | 
|  | tokid = self._token.id | 
|  | if tokid == id: | 
|  | return | 
|  | raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % ( | 
|  | tokid, id)) | 
|  |  | 
|  | def expectToken(self, id, line, col): | 
|  | self.expect(id) | 
|  | if self._token.location.line != line: | 
|  | raise BadExpectedToken( | 
|  | "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % ( | 
|  | id, self._token.lineno, line)) | 
|  | if self._token.location.column != col: | 
|  | raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % ( | 
|  | self._token.colno, col)) | 
|  |  | 
|  | def expectTokens(self, tokens): | 
|  | for id, line, col in tokens: | 
|  | self.expectToken(id, line, col) | 
|  |  | 
|  | def expectList(self, list_): | 
|  | for item in list_: | 
|  | self.expect(item) | 
|  |  | 
|  |  | 
|  | def test_CppTokenizer(): | 
|  | tester = CppTokenizerTester() | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)")) | 
|  | tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN, | 
|  | "01923_xy", tokRPAREN]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)")) | 
|  | tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, | 
|  | "defined", tokLPAREN, "BAZ", tokRPAREN]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("/*\n#\n*/")) | 
|  | tester.expectList([]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("first\nsecond")) | 
|  | tester.expectList(["first", "second"]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("first second\n  third")) | 
|  | tester.expectTokens([("first", 1, 1), | 
|  | ("second", 1, 7), | 
|  | ("third", 2, 3)]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */")) | 
|  | tester.expectTokens([("boo", 1, 1)]) | 
|  |  | 
|  | tester.setTokenizer(CppStringTokenizer("an \\\n example")) | 
|  | tester.expectTokens([("an", 1, 1), | 
|  | ("example", 2, 2)]) | 
|  | return True | 
|  |  | 
|  |  | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  | #####                                                                      ##### | 
|  | #####           C P P   E X P R E S S I O N S                              ##### | 
|  | #####                                                                      ##### | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  |  | 
|  |  | 
|  | class CppExpr(object): | 
|  | """A class that models the condition of #if directives into an expr tree. | 
|  |  | 
|  | Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where | 
|  | "op" is a string describing the operation | 
|  | """ | 
|  |  | 
|  | unaries = ["!", "~"] | 
|  | binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", | 
|  | "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] | 
|  | precedences = { | 
|  | "?": 1, ":": 1, | 
|  | "||": 2, | 
|  | "&&": 3, | 
|  | "|": 4, | 
|  | "^": 5, | 
|  | "&": 6, | 
|  | "==": 7, "!=": 7, | 
|  | "<": 8, "<=": 8, ">": 8, ">=": 8, | 
|  | "<<": 9, ">>": 9, | 
|  | "+": 10, "-": 10, | 
|  | "*": 11, "/": 11, "%": 11, | 
|  | "!": 12, "~": 12 | 
|  | } | 
|  |  | 
|  | def __init__(self, tokens): | 
|  | """Initialize a CppExpr. 'tokens' must be a CppToken list.""" | 
|  | self.tokens = tokens | 
|  | self._num_tokens = len(tokens) | 
|  | self._index = 0 | 
|  |  | 
|  | if debugCppExpr: | 
|  | print "CppExpr: trying to parse %s" % repr(tokens) | 
|  | self.expr = self.parseExpression(0) | 
|  | if debugCppExpr: | 
|  | print "CppExpr: got " + repr(self.expr) | 
|  | if self._index != self._num_tokens: | 
|  | self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" | 
|  | % (self._index, self._num_tokens, repr(tokens))) | 
|  |  | 
|  | def throw(self, exception, msg): | 
|  | if self._index < self._num_tokens: | 
|  | tok = self.tokens[self._index] | 
|  | print "%d:%d: %s" % (tok.location.line, tok.location.column, msg) | 
|  | else: | 
|  | print "EOF: %s" % msg | 
|  | raise exception(msg) | 
|  |  | 
|  | def expectId(self, id): | 
|  | """Check that a given token id is at the current position.""" | 
|  | token = self.tokens[self._index] | 
|  | if self._index >= self._num_tokens or token.id != id: | 
|  | self.throw(BadExpectedToken, | 
|  | "### expecting '%s' in expression, got '%s'" % ( | 
|  | id, token.id)) | 
|  | self._index += 1 | 
|  |  | 
|  | def is_decimal(self): | 
|  | token = self.tokens[self._index].id | 
|  | if token[-1] in "ULul": | 
|  | token = token[:-1] | 
|  | try: | 
|  | val = int(token, 10) | 
|  | self._index += 1 | 
|  | return ('int', val) | 
|  | except ValueError: | 
|  | return None | 
|  |  | 
|  | def is_octal(self): | 
|  | token = self.tokens[self._index].id | 
|  | if token[-1] in "ULul": | 
|  | token = token[:-1] | 
|  | if len(token) < 2 or token[0] != '0': | 
|  | return None | 
|  | try: | 
|  | val = int(token, 8) | 
|  | self._index += 1 | 
|  | return ('oct', val) | 
|  | except ValueError: | 
|  | return None | 
|  |  | 
|  | def is_hexadecimal(self): | 
|  | token = self.tokens[self._index].id | 
|  | if token[-1] in "ULul": | 
|  | token = token[:-1] | 
|  | if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): | 
|  | return None | 
|  | try: | 
|  | val = int(token, 16) | 
|  | self._index += 1 | 
|  | return ('hex', val) | 
|  | except ValueError: | 
|  | return None | 
|  |  | 
|  | def is_integer(self): | 
|  | if self.tokens[self._index].kind != TokenKind.LITERAL: | 
|  | return None | 
|  |  | 
|  | c = self.is_hexadecimal() | 
|  | if c: | 
|  | return c | 
|  |  | 
|  | c = self.is_octal() | 
|  | if c: | 
|  | return c | 
|  |  | 
|  | c = self.is_decimal() | 
|  | if c: | 
|  | return c | 
|  |  | 
|  | return None | 
|  |  | 
|  | def is_number(self): | 
|  | t = self.tokens[self._index] | 
|  | if t.id == tokMINUS and self._index + 1 < self._num_tokens: | 
|  | self._index += 1 | 
|  | c = self.is_integer() | 
|  | if c: | 
|  | op, val = c | 
|  | return (op, -val) | 
|  | if t.id == tokPLUS and self._index + 1 < self._num_tokens: | 
|  | self._index += 1 | 
|  | c = self.is_integer() | 
|  | if c: | 
|  | return c | 
|  |  | 
|  | return self.is_integer() | 
|  |  | 
|  | def is_defined(self): | 
|  | t = self.tokens[self._index] | 
|  | if t.id != tokDEFINED: | 
|  | return None | 
|  |  | 
|  | # We have the defined keyword, check the rest. | 
|  | self._index += 1 | 
|  | used_parens = False | 
|  | if (self._index < self._num_tokens and | 
|  | self.tokens[self._index].id == tokLPAREN): | 
|  | used_parens = True | 
|  | self._index += 1 | 
|  |  | 
|  | if self._index >= self._num_tokens: | 
|  | self.throw(BadExpectedToken, | 
|  | "### 'defined' must be followed by macro name or left " | 
|  | "paren") | 
|  |  | 
|  | t = self.tokens[self._index] | 
|  | if t.kind != TokenKind.IDENTIFIER: | 
|  | self.throw(BadExpectedToken, | 
|  | "### 'defined' must be followed by macro name") | 
|  |  | 
|  | self._index += 1 | 
|  | if used_parens: | 
|  | self.expectId(tokRPAREN) | 
|  |  | 
|  | return ("defined", t.id) | 
|  |  | 
|  | def is_call_or_ident(self): | 
|  | if self._index >= self._num_tokens: | 
|  | return None | 
|  |  | 
|  | t = self.tokens[self._index] | 
|  | if t.kind != TokenKind.IDENTIFIER: | 
|  | return None | 
|  |  | 
|  | name = t.id | 
|  |  | 
|  | self._index += 1 | 
|  | if (self._index >= self._num_tokens or | 
|  | self.tokens[self._index].id != tokLPAREN): | 
|  | return ("ident", name) | 
|  |  | 
|  | params = [] | 
|  | depth = 1 | 
|  | self._index += 1 | 
|  | j = self._index | 
|  | while self._index < self._num_tokens: | 
|  | id = self.tokens[self._index].id | 
|  | if id == tokLPAREN: | 
|  | depth += 1 | 
|  | elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): | 
|  | k = self._index | 
|  | param = self.tokens[j:k] | 
|  | params.append(param) | 
|  | if id == tokRPAREN: | 
|  | break | 
|  | j = self._index + 1 | 
|  | elif id == tokRPAREN: | 
|  | depth -= 1 | 
|  | self._index += 1 | 
|  |  | 
|  | if self._index >= self._num_tokens: | 
|  | return None | 
|  |  | 
|  | self._index += 1 | 
|  | return ("call", (name, params)) | 
|  |  | 
|  | # Implements the "precedence climbing" algorithm from | 
|  | # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. | 
|  | # The "classic" algorithm would be fine if we were using a tool to | 
|  | # generate the parser, but we're not. Dijkstra's "shunting yard" | 
|  | # algorithm hasn't been necessary yet. | 
|  |  | 
|  | def parseExpression(self, minPrecedence): | 
|  | if self._index >= self._num_tokens: | 
|  | return None | 
|  |  | 
|  | node = self.parsePrimary() | 
|  | while (self.token() and self.isBinary(self.token()) and | 
|  | self.precedence(self.token()) >= minPrecedence): | 
|  | op = self.token() | 
|  | self.nextToken() | 
|  | rhs = self.parseExpression(self.precedence(op) + 1) | 
|  | node = (op.id, node, rhs) | 
|  |  | 
|  | return node | 
|  |  | 
|  | def parsePrimary(self): | 
|  | op = self.token() | 
|  | if self.isUnary(op): | 
|  | self.nextToken() | 
|  | return (op.id, self.parseExpression(self.precedence(op))) | 
|  |  | 
|  | primary = None | 
|  | if op.id == tokLPAREN: | 
|  | self.nextToken() | 
|  | primary = self.parseExpression(0) | 
|  | self.expectId(tokRPAREN) | 
|  | elif op.id == "?": | 
|  | self.nextToken() | 
|  | primary = self.parseExpression(0) | 
|  | self.expectId(":") | 
|  | elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: | 
|  | primary = self.is_number() | 
|  | # Checking for 'defined' needs to come first now because 'defined' is | 
|  | # recognized as IDENTIFIER. | 
|  | elif op.id == tokDEFINED: | 
|  | primary = self.is_defined() | 
|  | elif op.kind == TokenKind.IDENTIFIER: | 
|  | primary = self.is_call_or_ident() | 
|  | else: | 
|  | self.throw(BadExpectedToken, | 
|  | "didn't expect to see a %s in factor" % ( | 
|  | self.tokens[self._index].id)) | 
|  | return primary | 
|  |  | 
|  | def isBinary(self, token): | 
|  | return token.id in self.binaries | 
|  |  | 
|  | def isUnary(self, token): | 
|  | return token.id in self.unaries | 
|  |  | 
|  | def precedence(self, token): | 
|  | return self.precedences.get(token.id) | 
|  |  | 
|  | def token(self): | 
|  | if self._index >= self._num_tokens: | 
|  | return None | 
|  | return self.tokens[self._index] | 
|  |  | 
|  | def nextToken(self): | 
|  | self._index += 1 | 
|  | if self._index >= self._num_tokens: | 
|  | return None | 
|  | return self.tokens[self._index] | 
|  |  | 
|  | def dump_node(self, e): | 
|  | op = e[0] | 
|  | line = "(" + op | 
|  | if op == "int": | 
|  | line += " %d)" % e[1] | 
|  | elif op == "oct": | 
|  | line += " 0%o)" % e[1] | 
|  | elif op == "hex": | 
|  | line += " 0x%x)" % e[1] | 
|  | elif op == "ident": | 
|  | line += " %s)" % e[1] | 
|  | elif op == "defined": | 
|  | line += " %s)" % e[1] | 
|  | elif op == "call": | 
|  | arg = e[1] | 
|  | line += " %s [" % arg[0] | 
|  | prefix = "" | 
|  | for param in arg[1]: | 
|  | par = "" | 
|  | for tok in param: | 
|  | par += str(tok) | 
|  | line += "%s%s" % (prefix, par) | 
|  | prefix = "," | 
|  | line += "])" | 
|  | elif op in CppExpr.unaries: | 
|  | line += " %s)" % self.dump_node(e[1]) | 
|  | elif op in CppExpr.binaries: | 
|  | line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) | 
|  | else: | 
|  | line += " ?%s)" % repr(e[1]) | 
|  |  | 
|  | return line | 
|  |  | 
|  | def __repr__(self): | 
|  | return self.dump_node(self.expr) | 
|  |  | 
|  | def source_node(self, e): | 
|  | op = e[0] | 
|  | if op == "int": | 
|  | return "%d" % e[1] | 
|  | if op == "hex": | 
|  | return "0x%x" % e[1] | 
|  | if op == "oct": | 
|  | return "0%o" % e[1] | 
|  | if op == "ident": | 
|  | # XXX: should try to expand | 
|  | return e[1] | 
|  | if op == "defined": | 
|  | return "defined(%s)" % e[1] | 
|  |  | 
|  | prec = CppExpr.precedences.get(op, 1000) | 
|  | arg = e[1] | 
|  | if op in CppExpr.unaries: | 
|  | arg_src = self.source_node(arg) | 
|  | arg_op = arg[0] | 
|  | arg_prec = CppExpr.precedences.get(arg_op, 1000) | 
|  | if arg_prec < prec: | 
|  | return "!(" + arg_src + ")" | 
|  | else: | 
|  | return "!" + arg_src | 
|  | if op in CppExpr.binaries: | 
|  | arg2 = e[2] | 
|  | arg1_op = arg[0] | 
|  | arg2_op = arg2[0] | 
|  | arg1_src = self.source_node(arg) | 
|  | arg2_src = self.source_node(arg2) | 
|  | if CppExpr.precedences.get(arg1_op, 1000) < prec: | 
|  | arg1_src = "(%s)" % arg1_src | 
|  | if CppExpr.precedences.get(arg2_op, 1000) < prec: | 
|  | arg2_src = "(%s)" % arg2_src | 
|  |  | 
|  | return "%s %s %s" % (arg1_src, op, arg2_src) | 
|  | return "???" | 
|  |  | 
|  | def __str__(self): | 
|  | return self.source_node(self.expr) | 
|  |  | 
|  | @staticmethod | 
|  | def int_node(e): | 
|  | if e[0] in ["int", "oct", "hex"]: | 
|  | return e[1] | 
|  | else: | 
|  | return None | 
|  |  | 
|  | def toInt(self): | 
|  | return self.int_node(self.expr) | 
|  |  | 
|  | def optimize_node(self, e, macros=None): | 
|  | if macros is None: | 
|  | macros = {} | 
|  | op = e[0] | 
|  |  | 
|  | if op == "defined": | 
|  | op, name = e | 
|  | if macros.has_key(name): | 
|  | if macros[name] == kCppUndefinedMacro: | 
|  | return ("int", 0) | 
|  | else: | 
|  | try: | 
|  | value = int(macros[name]) | 
|  | return ("int", value) | 
|  | except ValueError: | 
|  | return ("defined", macros[name]) | 
|  |  | 
|  | if kernel_remove_config_macros and name.startswith("CONFIG_"): | 
|  | return ("int", 0) | 
|  |  | 
|  | return e | 
|  |  | 
|  | elif op == "ident": | 
|  | op, name = e | 
|  | if macros.has_key(name): | 
|  | try: | 
|  | value = int(macros[name]) | 
|  | expanded = ("int", value) | 
|  | except ValueError: | 
|  | expanded = ("ident", macros[name]) | 
|  | return self.optimize_node(expanded, macros) | 
|  | return e | 
|  |  | 
|  | elif op == "!": | 
|  | op, v = e | 
|  | v = self.optimize_node(v, macros) | 
|  | if v[0] == "int": | 
|  | if v[1] == 0: | 
|  | return ("int", 1) | 
|  | else: | 
|  | return ("int", 0) | 
|  | return ('!', v) | 
|  |  | 
|  | elif op == "&&": | 
|  | op, l, r = e | 
|  | l = self.optimize_node(l, macros) | 
|  | r = self.optimize_node(r, macros) | 
|  | li = self.int_node(l) | 
|  | ri = self.int_node(r) | 
|  | if li is not None: | 
|  | if li == 0: | 
|  | return ("int", 0) | 
|  | else: | 
|  | return r | 
|  | elif ri is not None: | 
|  | if ri == 0: | 
|  | return ("int", 0) | 
|  | else: | 
|  | return l | 
|  | return (op, l, r) | 
|  |  | 
|  | elif op == "||": | 
|  | op, l, r = e | 
|  | l = self.optimize_node(l, macros) | 
|  | r = self.optimize_node(r, macros) | 
|  | li = self.int_node(l) | 
|  | ri = self.int_node(r) | 
|  | if li is not None: | 
|  | if li == 0: | 
|  | return r | 
|  | else: | 
|  | return ("int", 1) | 
|  | elif ri is not None: | 
|  | if ri == 0: | 
|  | return l | 
|  | else: | 
|  | return ("int", 1) | 
|  | return (op, l, r) | 
|  |  | 
|  | else: | 
|  | return e | 
|  |  | 
|  | def optimize(self, macros=None): | 
|  | if macros is None: | 
|  | macros = {} | 
|  | self.expr = self.optimize_node(self.expr, macros) | 
|  |  | 
|  |  | 
|  | def test_cpp_expr(expr, expected): | 
|  | e = CppExpr(CppStringTokenizer(expr).tokens) | 
|  | s1 = repr(e) | 
|  | if s1 != expected: | 
|  | print ("[FAIL]: expression '%s' generates '%s', should be " | 
|  | "'%s'" % (expr, s1, expected)) | 
|  | global failure_count | 
|  | failure_count += 1 | 
|  |  | 
|  |  | 
|  | def test_cpp_expr_optim(expr, expected, macros=None): | 
|  | if macros is None: | 
|  | macros = {} | 
|  | e = CppExpr(CppStringTokenizer(expr).tokens) | 
|  | e.optimize(macros) | 
|  | s1 = repr(e) | 
|  | if s1 != expected: | 
|  | print ("[FAIL]: optimized expression '%s' generates '%s' with " | 
|  | "macros %s, should be '%s'" % (expr, s1, macros, expected)) | 
|  | global failure_count | 
|  | failure_count += 1 | 
|  |  | 
|  |  | 
|  | def test_cpp_expr_source(expr, expected): | 
|  | e = CppExpr(CppStringTokenizer(expr).tokens) | 
|  | s1 = str(e) | 
|  | if s1 != expected: | 
|  | print ("[FAIL]: source expression '%s' generates '%s', should " | 
|  | "be '%s'" % (expr, s1, expected)) | 
|  | global failure_count | 
|  | failure_count += 1 | 
|  |  | 
|  |  | 
|  | def test_CppExpr(): | 
|  | test_cpp_expr("0", "(int 0)") | 
|  | test_cpp_expr("1", "(int 1)") | 
|  | test_cpp_expr("-5", "(int -5)") | 
|  | test_cpp_expr("+1", "(int 1)") | 
|  | test_cpp_expr("0U", "(int 0)") | 
|  | test_cpp_expr("015", "(oct 015)") | 
|  | test_cpp_expr("015l", "(oct 015)") | 
|  | test_cpp_expr("0x3e", "(hex 0x3e)") | 
|  | test_cpp_expr("(0)", "(int 0)") | 
|  | test_cpp_expr("1 && 1", "(&& (int 1) (int 1))") | 
|  | test_cpp_expr("1 && 0", "(&& (int 1) (int 0))") | 
|  | test_cpp_expr("EXAMPLE", "(ident EXAMPLE)") | 
|  | test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") | 
|  | test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)") | 
|  | test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)") | 
|  | test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))") | 
|  | test_cpp_expr("defined(ABC) || defined(BINGO)", | 
|  | "(|| (defined ABC) (defined BINGO))") | 
|  | test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])") | 
|  | test_cpp_expr("A == 1 || defined(B)", | 
|  | "(|| (== (ident A) (int 1)) (defined B))") | 
|  |  | 
|  | test_cpp_expr_optim("0", "(int 0)") | 
|  | test_cpp_expr_optim("1", "(int 1)") | 
|  | test_cpp_expr_optim("1 && 1", "(int 1)") | 
|  | test_cpp_expr_optim("1 && +1", "(int 1)") | 
|  | test_cpp_expr_optim("0x1 && 01", "(oct 01)") | 
|  | test_cpp_expr_optim("1 && 0", "(int 0)") | 
|  | test_cpp_expr_optim("0 && 1", "(int 0)") | 
|  | test_cpp_expr_optim("0 && 0", "(int 0)") | 
|  | test_cpp_expr_optim("1 || 1", "(int 1)") | 
|  | test_cpp_expr_optim("1 || 0", "(int 1)") | 
|  | test_cpp_expr_optim("0 || 1", "(int 1)") | 
|  | test_cpp_expr_optim("0 || 0", "(int 0)") | 
|  | test_cpp_expr_optim("A", "(ident A)") | 
|  | test_cpp_expr_optim("A", "(int 1)", {"A": 1}) | 
|  | test_cpp_expr_optim("A || B", "(int 1)", {"A": 1}) | 
|  | test_cpp_expr_optim("A || B", "(int 1)", {"B": 1}) | 
|  | test_cpp_expr_optim("A && B", "(ident B)", {"A": 1}) | 
|  | test_cpp_expr_optim("A && B", "(ident A)", {"B": 1}) | 
|  | test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))") | 
|  | test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)") | 
|  | test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") | 
|  | test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)") | 
|  | test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", | 
|  | {"EXAMPLE": "XOWOE"}) | 
|  | test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", | 
|  | {"EXAMPLE": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))") | 
|  | test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", | 
|  | {"EXAMPLE": "XOWOE"}) | 
|  | test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", | 
|  | {"EXAMPLE": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("defined(A) || defined(B)", | 
|  | "(|| (defined A) (defined B))") | 
|  | test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"}) | 
|  | test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"}) | 
|  | test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", | 
|  | {"B": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", | 
|  | {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("defined(A) && defined(B)", | 
|  | "(&& (defined A) (defined B))") | 
|  | test_cpp_expr_optim("defined(A) && defined(B)", | 
|  | "(defined B)", {"A": "1"}) | 
|  | test_cpp_expr_optim("defined(A) && defined(B)", | 
|  | "(defined A)", {"B": "1"}) | 
|  | test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", | 
|  | {"B": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("defined(A) && defined(B)", | 
|  | "(int 0)", {"A": kCppUndefinedMacro}) | 
|  | test_cpp_expr_optim("A == 1 || defined(B)", | 
|  | "(|| (== (ident A) (int 1)) (defined B))") | 
|  | test_cpp_expr_optim( | 
|  | "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", | 
|  | "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", | 
|  | {"__KERNEL__": kCppUndefinedMacro}) | 
|  |  | 
|  | test_cpp_expr_source("0", "0") | 
|  | test_cpp_expr_source("1", "1") | 
|  | test_cpp_expr_source("1 && 1", "1 && 1") | 
|  | test_cpp_expr_source("1 && 0", "1 && 0") | 
|  | test_cpp_expr_source("0 && 1", "0 && 1") | 
|  | test_cpp_expr_source("0 && 0", "0 && 0") | 
|  | test_cpp_expr_source("1 || 1", "1 || 1") | 
|  | test_cpp_expr_source("1 || 0", "1 || 0") | 
|  | test_cpp_expr_source("0 || 1", "0 || 1") | 
|  | test_cpp_expr_source("0 || 0", "0 || 0") | 
|  | test_cpp_expr_source("EXAMPLE", "EXAMPLE") | 
|  | test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3") | 
|  | test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)") | 
|  | test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)") | 
|  | test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)") | 
|  |  | 
|  |  | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  | #####                                                                      ##### | 
|  | #####          C P P   B L O C K                                           ##### | 
|  | #####                                                                      ##### | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  |  | 
|  |  | 
|  | class Block(object): | 
|  | """A class used to model a block of input source text. | 
|  |  | 
|  | There are two block types: | 
|  | - directive blocks: contain the tokens of a single pre-processor | 
|  | directive (e.g. #if) | 
|  | - text blocks, contain the tokens of non-directive blocks | 
|  |  | 
|  | The cpp parser class below will transform an input source file into a list | 
|  | of Block objects (grouped in a BlockList object for convenience) | 
|  | """ | 
|  |  | 
|  | def __init__(self, tokens, directive=None, lineno=0, identifier=None): | 
|  | """Initialize a new block, if 'directive' is None, it is a text block. | 
|  |  | 
|  | NOTE: This automatically converts '#ifdef MACRO' into | 
|  | '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. | 
|  | """ | 
|  |  | 
|  | if directive == "ifdef": | 
|  | tok = Token() | 
|  | tok.id = tokDEFINED | 
|  | tokens = [tok] + tokens | 
|  | directive = "if" | 
|  |  | 
|  | elif directive == "ifndef": | 
|  | tok1 = Token() | 
|  | tok2 = Token() | 
|  | tok1.id = tokNOT | 
|  | tok2.id = tokDEFINED | 
|  | tokens = [tok1, tok2] + tokens | 
|  | directive = "if" | 
|  |  | 
|  | self.tokens = tokens | 
|  | self.directive = directive | 
|  | self.define_id = identifier | 
|  | if lineno > 0: | 
|  | self.lineno = lineno | 
|  | else: | 
|  | self.lineno = self.tokens[0].location.line | 
|  |  | 
|  | if self.isIf(): | 
|  | self.expr = CppExpr(self.tokens) | 
|  |  | 
|  | def isDirective(self): | 
|  | """Return True iff this is a directive block.""" | 
|  | return self.directive is not None | 
|  |  | 
|  | def isConditional(self): | 
|  | """Return True iff this is a conditional directive block.""" | 
|  | return self.directive in ["if", "ifdef", "ifndef", "else", "elif", | 
|  | "endif"] | 
|  |  | 
|  | def isDefine(self): | 
|  | """Return the macro name in a #define directive, or None otherwise.""" | 
|  | if self.directive != "define": | 
|  | return None | 
|  | return self.define_id | 
|  |  | 
|  | def isIf(self): | 
|  | """Return True iff this is an #if-like directive block.""" | 
|  | return self.directive in ["if", "ifdef", "ifndef", "elif"] | 
|  |  | 
|  | def isEndif(self): | 
|  | """Return True iff this is an #endif directive block.""" | 
|  | return self.directive == "endif" | 
|  |  | 
|  | def isInclude(self): | 
|  | """Check whether this is a #include directive. | 
|  |  | 
|  | If true, returns the corresponding file name (with brackets or | 
|  | double-qoutes). None otherwise. | 
|  | """ | 
|  |  | 
|  | if self.directive != "include": | 
|  | return None | 
|  | return ''.join([str(x) for x in self.tokens]) | 
|  |  | 
|  | @staticmethod | 
|  | def format_blocks(tokens, indent=0): | 
|  | """Return the formatted lines of strings with proper indentation.""" | 
|  | newline = True | 
|  | result = [] | 
|  | buf = '' | 
|  | i = 0 | 
|  | while i < len(tokens): | 
|  | t = tokens[i] | 
|  | if t.id == '{': | 
|  | buf += ' {' | 
|  | result.append(strip_space(buf)) | 
|  | indent += 2 | 
|  | buf = '' | 
|  | newline = True | 
|  | elif t.id == '}': | 
|  | indent -= 2 | 
|  | if not newline: | 
|  | result.append(strip_space(buf)) | 
|  | # Look ahead to determine if it's the end of line. | 
|  | if (i + 1 < len(tokens) and | 
|  | (tokens[i+1].id == ';' or | 
|  | tokens[i+1].id in ['else', '__attribute__', | 
|  | '__attribute', '__packed'] or | 
|  | tokens[i+1].kind == TokenKind.IDENTIFIER)): | 
|  | buf = ' ' * indent + '}' | 
|  | newline = False | 
|  | else: | 
|  | result.append(' ' * indent + '}') | 
|  | buf = '' | 
|  | newline = True | 
|  | elif t.id == ';': | 
|  | result.append(strip_space(buf) + ';') | 
|  | buf = '' | 
|  | newline = True | 
|  | # We prefer a new line for each constant in enum. | 
|  | elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: | 
|  | result.append(strip_space(buf) + ',') | 
|  | buf = '' | 
|  | newline = True | 
|  | else: | 
|  | if newline: | 
|  | buf += ' ' * indent + str(t) | 
|  | else: | 
|  | buf += ' ' + str(t) | 
|  | newline = False | 
|  | i += 1 | 
|  |  | 
|  | if buf: | 
|  | result.append(strip_space(buf)) | 
|  |  | 
|  | return result, indent | 
|  |  | 
|  | def writeWithWarning(self, out, warning, left_count, repeat_count, indent): | 
|  | """Dump the current block with warnings.""" | 
|  | # removeWhiteSpace() will sometimes creates non-directive blocks | 
|  | # without any tokens. These come from blocks that only contained | 
|  | # empty lines and spaces. They should not be printed in the final | 
|  | # output, and then should not be counted for this operation. | 
|  | # | 
|  | if self.directive is None and not self.tokens: | 
|  | return left_count, indent | 
|  |  | 
|  | if self.directive: | 
|  | out.write(str(self) + '\n') | 
|  | left_count -= 1 | 
|  | if left_count == 0: | 
|  | out.write(warning) | 
|  | left_count = repeat_count | 
|  |  | 
|  | else: | 
|  | lines, indent = self.format_blocks(self.tokens, indent) | 
|  | for line in lines: | 
|  | out.write(line + '\n') | 
|  | left_count -= 1 | 
|  | if left_count == 0: | 
|  | out.write(warning) | 
|  | left_count = repeat_count | 
|  |  | 
|  | return left_count, indent | 
|  |  | 
|  | def __repr__(self): | 
|  | """Generate the representation of a given block.""" | 
|  | if self.directive: | 
|  | result = "#%s " % self.directive | 
|  | if self.isIf(): | 
|  | result += repr(self.expr) | 
|  | else: | 
|  | for tok in self.tokens: | 
|  | result += repr(tok) | 
|  | else: | 
|  | result = "" | 
|  | for tok in self.tokens: | 
|  | result += repr(tok) | 
|  |  | 
|  | return result | 
|  |  | 
|  | def __str__(self): | 
|  | """Generate the string representation of a given block.""" | 
|  | if self.directive: | 
|  | # "#if" | 
|  | if self.directive == "if": | 
|  | # small optimization to re-generate #ifdef and #ifndef | 
|  | e = self.expr.expr | 
|  | op = e[0] | 
|  | if op == "defined": | 
|  | result = "#ifdef %s" % e[1] | 
|  | elif op == "!" and e[1][0] == "defined": | 
|  | result = "#ifndef %s" % e[1][1] | 
|  | else: | 
|  | result = "#if " + str(self.expr) | 
|  |  | 
|  | # "#define" | 
|  | elif self.isDefine(): | 
|  | result = "#%s %s" % (self.directive, self.define_id) | 
|  | if self.tokens: | 
|  | result += " " | 
|  | expr = strip_space(' '.join([tok.id for tok in self.tokens])) | 
|  | # remove the space between name and '(' in function call | 
|  | result += re.sub(r'(\w+) \(', r'\1(', expr) | 
|  |  | 
|  | # "#error" | 
|  | # Concatenating tokens with a space separator, because they may | 
|  | # not be quoted and broken into several tokens | 
|  | elif self.directive == "error": | 
|  | result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) | 
|  |  | 
|  | else: | 
|  | result = "#%s" % self.directive | 
|  | if self.tokens: | 
|  | result += " " | 
|  | result += ''.join([tok.id for tok in self.tokens]) | 
|  | else: | 
|  | lines, _ = self.format_blocks(self.tokens) | 
|  | result = '\n'.join(lines) | 
|  |  | 
|  | return result | 
|  |  | 
|  |  | 
|  | class BlockList(object): | 
|  | """A convenience class used to hold and process a list of blocks. | 
|  |  | 
|  | It calls the cpp parser to get the blocks. | 
|  | """ | 
|  |  | 
|  | def __init__(self, blocks): | 
|  | self.blocks = blocks | 
|  |  | 
|  | def __len__(self): | 
|  | return len(self.blocks) | 
|  |  | 
|  | def __getitem__(self, n): | 
|  | return self.blocks[n] | 
|  |  | 
|  | def __repr__(self): | 
|  | return repr(self.blocks) | 
|  |  | 
|  | def __str__(self): | 
|  | result = '\n'.join([str(b) for b in self.blocks]) | 
|  | return result | 
|  |  | 
|  | def dump(self): | 
|  | """Dump all the blocks in current BlockList.""" | 
|  | print '##### BEGIN #####' | 
|  | for i, b in enumerate(self.blocks): | 
|  | print '### BLOCK %d ###' % i | 
|  | print b | 
|  | print '##### END #####' | 
|  |  | 
|  | def optimizeIf01(self): | 
|  | """Remove the code between #if 0 .. #endif in a BlockList.""" | 
|  | self.blocks = optimize_if01(self.blocks) | 
|  |  | 
|  | def optimizeMacros(self, macros): | 
|  | """Remove known defined and undefined macros from a BlockList.""" | 
|  | for b in self.blocks: | 
|  | if b.isIf(): | 
|  | b.expr.optimize(macros) | 
|  |  | 
|  | def removeMacroDefines(self, macros): | 
|  | """Remove known macro definitions from a BlockList.""" | 
|  | self.blocks = remove_macro_defines(self.blocks, macros) | 
|  |  | 
|  | def optimizeAll(self, macros): | 
|  | self.optimizeMacros(macros) | 
|  | self.optimizeIf01() | 
|  | return | 
|  |  | 
|  | def findIncludes(self): | 
|  | """Return the list of included files in a BlockList.""" | 
|  | result = [] | 
|  | for b in self.blocks: | 
|  | i = b.isInclude() | 
|  | if i: | 
|  | result.append(i) | 
|  | return result | 
|  |  | 
|  | def write(self, out): | 
|  | out.write(str(self)) | 
|  |  | 
|  | def writeWithWarning(self, out, warning, repeat_count): | 
|  | left_count = repeat_count | 
|  | indent = 0 | 
|  | for b in self.blocks: | 
|  | left_count, indent = b.writeWithWarning(out, warning, left_count, | 
|  | repeat_count, indent) | 
|  |  | 
|  | def removeVarsAndFuncs(self, knownStatics=None): | 
|  | """Remove variable and function declarations. | 
|  |  | 
|  | All extern and static declarations corresponding to variable and | 
|  | function declarations are removed. We only accept typedefs and | 
|  | enum/structs/union declarations. | 
|  |  | 
|  | However, we keep the definitions corresponding to the set of known | 
|  | static inline functions in the set 'knownStatics', which is useful | 
|  | for optimized byteorder swap functions and stuff like that. | 
|  | """ | 
|  |  | 
|  | # NOTE: It's also removing function-like macros, such as __SYSCALL(...) | 
|  | # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h. | 
|  | # It could be problematic when we have function-like macros but without | 
|  | # '}' following them. It will skip all the tokens/blocks until seeing a | 
|  | # '}' as the function end. Fortunately we don't have such cases in the | 
|  | # current kernel headers. | 
|  |  | 
|  | # state = 0 => normal (i.e. LN + spaces) | 
|  | # state = 1 => typedef/struct encountered, ends with ";" | 
|  | # state = 2 => var declaration encountered, ends with ";" | 
|  | # state = 3 => func declaration encountered, ends with "}" | 
|  |  | 
|  | if knownStatics is None: | 
|  | knownStatics = set() | 
|  | state = 0 | 
|  | depth = 0 | 
|  | blocks2 = [] | 
|  | skipTokens = False | 
|  | for b in self.blocks: | 
|  | if b.isDirective(): | 
|  | blocks2.append(b) | 
|  | else: | 
|  | n = len(b.tokens) | 
|  | i = 0 | 
|  | if skipTokens: | 
|  | first = n | 
|  | else: | 
|  | first = 0 | 
|  | while i < n: | 
|  | tok = b.tokens[i] | 
|  | tokid = tok.id | 
|  | # If we are not looking for the start of a new | 
|  | # type/var/func, then skip over tokens until | 
|  | # we find our terminator, managing the depth of | 
|  | # accolades as we go. | 
|  | if state > 0: | 
|  | terminator = False | 
|  | if tokid == '{': | 
|  | depth += 1 | 
|  | elif tokid == '}': | 
|  | if depth > 0: | 
|  | depth -= 1 | 
|  | if (depth == 0) and (state == 3): | 
|  | terminator = True | 
|  | elif tokid == ';' and depth == 0: | 
|  | terminator = True | 
|  |  | 
|  | if terminator: | 
|  | # we found the terminator | 
|  | state = 0 | 
|  | if skipTokens: | 
|  | skipTokens = False | 
|  | first = i + 1 | 
|  |  | 
|  | i += 1 | 
|  | continue | 
|  |  | 
|  | # Is it a new type definition, then start recording it | 
|  | if tok.id in ['struct', 'typedef', 'enum', 'union', | 
|  | '__extension__']: | 
|  | state = 1 | 
|  | i += 1 | 
|  | continue | 
|  |  | 
|  | # Is it a variable or function definition. If so, first | 
|  | # try to determine which type it is, and also extract | 
|  | # its name. | 
|  | # | 
|  | # We're going to parse the next tokens of the same block | 
|  | # until we find a semi-column or a left parenthesis. | 
|  | # | 
|  | # The semi-column corresponds to a variable definition, | 
|  | # the left-parenthesis to a function definition. | 
|  | # | 
|  | # We also assume that the var/func name is the last | 
|  | # identifier before the terminator. | 
|  | # | 
|  | j = i + 1 | 
|  | ident = "" | 
|  | while j < n: | 
|  | tokid = b.tokens[j].id | 
|  | if tokid == '(':  # a function declaration | 
|  | state = 3 | 
|  | break | 
|  | elif tokid == ';':  # a variable declaration | 
|  | state = 2 | 
|  | break | 
|  | if b.tokens[j].kind == TokenKind.IDENTIFIER: | 
|  | ident = b.tokens[j].id | 
|  | j += 1 | 
|  |  | 
|  | if j >= n: | 
|  | # This can only happen when the declaration | 
|  | # does not end on the current block (e.g. with | 
|  | # a directive mixed inside it. | 
|  | # | 
|  | # We will treat it as malformed because | 
|  | # it's very hard to recover from this case | 
|  | # without making our parser much more | 
|  | # complex. | 
|  | # | 
|  | logging.debug("### skip unterminated static '%s'", | 
|  | ident) | 
|  | break | 
|  |  | 
|  | if ident in knownStatics: | 
|  | logging.debug("### keep var/func '%s': %s", ident, | 
|  | repr(b.tokens[i:j])) | 
|  | else: | 
|  | # We're going to skip the tokens for this declaration | 
|  | logging.debug("### skip var/func '%s': %s", ident, | 
|  | repr(b.tokens[i:j])) | 
|  | if i > first: | 
|  | blocks2.append(Block(b.tokens[first:i])) | 
|  | skipTokens = True | 
|  | first = n | 
|  |  | 
|  | i += 1 | 
|  |  | 
|  | if i > first: | 
|  | # print "### final '%s'" % repr(b.tokens[first:i]) | 
|  | blocks2.append(Block(b.tokens[first:i])) | 
|  |  | 
|  | self.blocks = blocks2 | 
|  |  | 
|  | def replaceTokens(self, replacements): | 
|  | """Replace tokens according to the given dict.""" | 
|  | for b in self.blocks: | 
|  | made_change = False | 
|  | if b.isInclude() is None: | 
|  | for tok in b.tokens: | 
|  | if tok.kind == TokenKind.IDENTIFIER: | 
|  | if tok.id in replacements: | 
|  | tok.id = replacements[tok.id] | 
|  | made_change = True | 
|  |  | 
|  | if b.isDefine() and b.define_id in replacements: | 
|  | b.define_id = replacements[b.define_id] | 
|  | made_change = True | 
|  |  | 
|  | if made_change and b.isIf(): | 
|  | # Keep 'expr' in sync with 'tokens'. | 
|  | b.expr = CppExpr(b.tokens) | 
|  |  | 
|  |  | 
|  | def strip_space(s): | 
|  | """Strip out redundant space in a given string.""" | 
|  |  | 
|  | # NOTE: It ought to be more clever to not destroy spaces in string tokens. | 
|  | replacements = {' . ': '.', | 
|  | ' [': '[', | 
|  | '[ ': '[', | 
|  | ' ]': ']', | 
|  | '( ': '(', | 
|  | ' )': ')', | 
|  | ' ,': ',', | 
|  | '# ': '#', | 
|  | ' ;': ';', | 
|  | '~ ': '~', | 
|  | ' -> ': '->'} | 
|  | result = s | 
|  | for r in replacements: | 
|  | result = result.replace(r, replacements[r]) | 
|  |  | 
|  | # Remove the space between function name and the parenthesis. | 
|  | result = re.sub(r'(\w+) \(', r'\1(', result) | 
|  | return result | 
|  |  | 
|  |  | 
|  | class BlockParser(object): | 
|  | """A class that converts an input source file into a BlockList object.""" | 
|  |  | 
|  | def __init__(self, tokzer=None): | 
|  | """Initialize a block parser. | 
|  |  | 
|  | The input source is provided through a Tokenizer object. | 
|  | """ | 
|  | self._tokzer = tokzer | 
|  | self._parsed = False | 
|  |  | 
|  | @property | 
|  | def parsed(self): | 
|  | return self._parsed | 
|  |  | 
|  | @staticmethod | 
|  | def _short_extent(extent): | 
|  | return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, | 
|  | extent.end.line, extent.end.column) | 
|  |  | 
|  | def getBlocks(self, tokzer=None): | 
|  | """Return all the blocks parsed.""" | 
|  |  | 
|  | def consume_extent(i, tokens, extent=None, detect_change=False): | 
|  | """Return tokens that belong to the given extent. | 
|  |  | 
|  | It parses all the tokens that follow tokens[i], until getting out | 
|  | of the extent. When detect_change is True, it may terminate early | 
|  | when detecting preprocessing directives inside the extent. | 
|  | """ | 
|  |  | 
|  | result = [] | 
|  | if extent is None: | 
|  | extent = tokens[i].cursor.extent | 
|  |  | 
|  | while i < len(tokens) and tokens[i].location in extent: | 
|  | t = tokens[i] | 
|  | if debugBlockParser: | 
|  | print ' ' * 2, t.id, t.kind, t.cursor.kind | 
|  | if (detect_change and t.cursor.extent != extent and | 
|  | t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): | 
|  | break | 
|  | result.append(t) | 
|  | i += 1 | 
|  | return (i, result) | 
|  |  | 
|  | def consume_line(i, tokens): | 
|  | """Return tokens that follow tokens[i] in the same line.""" | 
|  | result = [] | 
|  | line = tokens[i].location.line | 
|  | while i < len(tokens) and tokens[i].location.line == line: | 
|  | if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: | 
|  | break | 
|  | result.append(tokens[i]) | 
|  | i += 1 | 
|  | return (i, result) | 
|  |  | 
|  | if tokzer is None: | 
|  | tokzer = self._tokzer | 
|  | tokens = tokzer.tokens | 
|  |  | 
|  | blocks = [] | 
|  | buf = [] | 
|  | i = 0 | 
|  |  | 
|  | while i < len(tokens): | 
|  | t = tokens[i] | 
|  | cursor = t.cursor | 
|  |  | 
|  | if debugBlockParser: | 
|  | print ("%d: Processing [%s], kind=[%s], cursor=[%s], " | 
|  | "extent=[%s]" % (t.location.line, t.spelling, t.kind, | 
|  | cursor.kind, | 
|  | self._short_extent(cursor.extent))) | 
|  |  | 
|  | if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: | 
|  | if buf: | 
|  | blocks.append(Block(buf)) | 
|  | buf = [] | 
|  |  | 
|  | j = i | 
|  | if j + 1 >= len(tokens): | 
|  | raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) | 
|  | directive = tokens[j+1].id | 
|  |  | 
|  | if directive == 'define': | 
|  | if i+2 >= len(tokens): | 
|  | raise BadExpectedToken("### BAD TOKEN at %s" % | 
|  | (tokens[i].location)) | 
|  |  | 
|  | # Skip '#' and 'define'. | 
|  | extent = tokens[i].cursor.extent | 
|  | i += 2 | 
|  | id = '' | 
|  | # We need to separate the id from the remaining of | 
|  | # the line, especially for the function-like macro. | 
|  | if (i + 1 < len(tokens) and tokens[i+1].id == '(' and | 
|  | (tokens[i].location.column + len(tokens[i].spelling) == | 
|  | tokens[i+1].location.column)): | 
|  | while i < len(tokens): | 
|  | id += tokens[i].id | 
|  | if tokens[i].spelling == ')': | 
|  | i += 1 | 
|  | break | 
|  | i += 1 | 
|  | else: | 
|  | id += tokens[i].id | 
|  | # Advance to the next token that follows the macro id | 
|  | i += 1 | 
|  |  | 
|  | (i, ret) = consume_extent(i, tokens, extent=extent) | 
|  | blocks.append(Block(ret, directive=directive, | 
|  | lineno=t.location.line, identifier=id)) | 
|  |  | 
|  | else: | 
|  | (i, ret) = consume_extent(i, tokens) | 
|  | blocks.append(Block(ret[2:], directive=directive, | 
|  | lineno=t.location.line)) | 
|  |  | 
|  | elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: | 
|  | if buf: | 
|  | blocks.append(Block(buf)) | 
|  | buf = [] | 
|  | directive = tokens[i+1].id | 
|  | (i, ret) = consume_extent(i, tokens) | 
|  |  | 
|  | blocks.append(Block(ret[2:], directive=directive, | 
|  | lineno=t.location.line)) | 
|  |  | 
|  | elif cursor.kind == CursorKind.VAR_DECL: | 
|  | if buf: | 
|  | blocks.append(Block(buf)) | 
|  | buf = [] | 
|  |  | 
|  | (i, ret) = consume_extent(i, tokens, detect_change=True) | 
|  | buf += ret | 
|  |  | 
|  | elif cursor.kind == CursorKind.FUNCTION_DECL: | 
|  | if buf: | 
|  | blocks.append(Block(buf)) | 
|  | buf = [] | 
|  |  | 
|  | (i, ret) = consume_extent(i, tokens, detect_change=True) | 
|  | buf += ret | 
|  |  | 
|  | else: | 
|  | (i, ret) = consume_line(i, tokens) | 
|  | buf += ret | 
|  |  | 
|  | if buf: | 
|  | blocks.append(Block(buf)) | 
|  |  | 
|  | # _parsed=True indicates a successful parsing, although may result an | 
|  | # empty BlockList. | 
|  | self._parsed = True | 
|  |  | 
|  | return BlockList(blocks) | 
|  |  | 
|  | def parse(self, tokzer): | 
|  | return self.getBlocks(tokzer) | 
|  |  | 
|  | def parseFile(self, path): | 
|  | return self.getBlocks(CppFileTokenizer(path)) | 
|  |  | 
|  |  | 
|  | def test_block_parsing(lines, expected): | 
|  | """Helper method to test the correctness of BlockParser.parse.""" | 
|  | blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) | 
|  | if len(blocks) != len(expected): | 
|  | raise BadExpectedToken("BlockParser.parse() returned '%s' expecting " | 
|  | "'%s'" % (str(blocks), repr(expected))) | 
|  | for n in range(len(blocks)): | 
|  | if str(blocks[n]) != expected[n]: | 
|  | raise BadExpectedToken("BlockParser.parse()[%d] is '%s', " | 
|  | "expecting '%s'" % (n, str(blocks[n]), | 
|  | expected[n])) | 
|  |  | 
|  |  | 
|  | def test_BlockParser(): | 
|  | test_block_parsing(["#error hello"], ["#error hello"]) | 
|  | test_block_parsing(["foo", "", "bar"], ["foo bar"]) | 
|  |  | 
|  | # We currently cannot handle the following case with libclang properly. | 
|  | # Fortunately it doesn't appear in current headers. | 
|  | # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"]) | 
|  |  | 
|  | test_block_parsing(["foo", | 
|  | "  #  /* ahah */ if defined(__KERNEL__) /* more */", | 
|  | "bar", "#endif"], | 
|  | ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) | 
|  |  | 
|  |  | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  | #####                                                                      ##### | 
|  | #####        B L O C K   L I S T   O P T I M I Z A T I O N                 ##### | 
|  | #####                                                                      ##### | 
|  | ################################################################################ | 
|  | ################################################################################ | 
|  |  | 
|  |  | 
|  | def remove_macro_defines(blocks, excludedMacros=None): | 
|  | """Remove macro definitions like #define <macroName>  ....""" | 
|  | if excludedMacros is None: | 
|  | excludedMacros = set() | 
|  | result = [] | 
|  | for b in blocks: | 
|  | macroName = b.isDefine() | 
|  | if macroName is None or macroName not in excludedMacros: | 
|  | result.append(b) | 
|  |  | 
|  | return result | 
|  |  | 
|  |  | 
|  | def find_matching_endif(blocks, i): | 
|  | """Traverse the blocks to find out the matching #endif.""" | 
|  | n = len(blocks) | 
|  | depth = 1 | 
|  | while i < n: | 
|  | if blocks[i].isDirective(): | 
|  | dir_ = blocks[i].directive | 
|  | if dir_ in ["if", "ifndef", "ifdef"]: | 
|  | depth += 1 | 
|  | elif depth == 1 and dir_ in ["else", "elif"]: | 
|  | return i | 
|  | elif dir_ == "endif": | 
|  | depth -= 1 | 
|  | if depth == 0: | 
|  | return i | 
|  | i += 1 | 
|  | return i | 
|  |  | 
|  |  | 
|  | def optimize_if01(blocks): | 
|  | """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" | 
|  | i = 0 | 
|  | n = len(blocks) | 
|  | result = [] | 
|  | while i < n: | 
|  | j = i | 
|  | while j < n and not blocks[j].isIf(): | 
|  | j += 1 | 
|  | if j > i: | 
|  | logging.debug("appending lines %d to %d", blocks[i].lineno, | 
|  | blocks[j-1].lineno) | 
|  | result += blocks[i:j] | 
|  | if j >= n: | 
|  | break | 
|  | expr = blocks[j].expr | 
|  | r = expr.toInt() | 
|  | if r is None: | 
|  | result.append(blocks[j]) | 
|  | i = j + 1 | 
|  | continue | 
|  |  | 
|  | if r == 0: | 
|  | # if 0 => skip everything until the corresponding #endif | 
|  | j = find_matching_endif(blocks, j + 1) | 
|  | if j >= n: | 
|  | # unterminated #if 0, finish here | 
|  | break | 
|  | dir_ = blocks[j].directive | 
|  | if dir_ == "endif": | 
|  | logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", | 
|  | blocks[i].lineno, blocks[j].lineno) | 
|  | i = j + 1 | 
|  | elif dir_ == "else": | 
|  | # convert 'else' into 'if 1' | 
|  | logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " | 
|  | "to %d)", blocks[i].lineno, blocks[j-1].lineno) | 
|  | blocks[j].directive = "if" | 
|  | blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) | 
|  | i = j | 
|  | elif dir_ == "elif": | 
|  | # convert 'elif' into 'if' | 
|  | logging.debug("convert 'if 0' .. 'elif' into 'if'") | 
|  | blocks[j].directive = "if" | 
|  | i = j | 
|  | continue | 
|  |  | 
|  | # if 1 => find corresponding endif and remove/transform them | 
|  | k = find_matching_endif(blocks, j + 1) | 
|  | if k >= n: | 
|  | # unterminated #if 1, finish here | 
|  | logging.debug("unterminated 'if 1'") | 
|  | result += blocks[j+1:k] | 
|  | break | 
|  |  | 
|  | dir_ = blocks[k].directive | 
|  | if dir_ == "endif": | 
|  | logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", | 
|  | blocks[j].lineno, blocks[k].lineno) | 
|  | result += optimize_if01(blocks[j+1:k]) | 
|  | i = k + 1 | 
|  | elif dir_ == "else": | 
|  | # convert 'else' into 'if 0' | 
|  | logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", | 
|  | blocks[j].lineno, blocks[k].lineno) | 
|  | result += optimize_if01(blocks[j+1:k]) | 
|  | blocks[k].directive = "if" | 
|  | blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) | 
|  | i = k | 
|  | elif dir_ == "elif": | 
|  | # convert 'elif' into 'if 0' | 
|  | logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", | 
|  | blocks[j].lineno, blocks[k].lineno) | 
|  | result += optimize_if01(blocks[j+1:k]) | 
|  | blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) | 
|  | i = k | 
|  | return result | 
|  |  | 
|  |  | 
|  | def test_optimizeAll(): | 
|  | text = """\ | 
|  | #if 1 | 
|  | #define  GOOD_1 | 
|  | #endif | 
|  | #if 0 | 
|  | #define  BAD_2 | 
|  | #define  BAD_3 | 
|  | #endif | 
|  |  | 
|  | #if 1 | 
|  | #define  GOOD_2 | 
|  | #else | 
|  | #define  BAD_4 | 
|  | #endif | 
|  |  | 
|  | #if 0 | 
|  | #define  BAD_5 | 
|  | #else | 
|  | #define  GOOD_3 | 
|  | #endif | 
|  |  | 
|  | #if defined(__KERNEL__) | 
|  | #define BAD_KERNEL | 
|  | #endif | 
|  |  | 
|  | #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) | 
|  | #define X | 
|  | #endif | 
|  |  | 
|  | #ifndef SIGRTMAX | 
|  | #define SIGRTMAX 123 | 
|  | #endif /* SIGRTMAX */ | 
|  |  | 
|  | #if 0 | 
|  | #if 1 | 
|  | #define  BAD_6 | 
|  | #endif | 
|  | #endif\ | 
|  | """ | 
|  |  | 
|  | expected = """\ | 
|  | #define GOOD_1 | 
|  | #define GOOD_2 | 
|  | #define GOOD_3 | 
|  | #if !defined(__GLIBC__) || __GLIBC__ < 2 | 
|  | #define X | 
|  | #endif | 
|  | #ifndef __SIGRTMAX | 
|  | #define __SIGRTMAX 123 | 
|  | #endif\ | 
|  | """ | 
|  |  | 
|  | out = utils.StringOutput() | 
|  | blocks = BlockParser().parse(CppStringTokenizer(text)) | 
|  | blocks.replaceTokens(kernel_token_replacements) | 
|  | blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro}) | 
|  | blocks.write(out) | 
|  | if out.get() != expected: | 
|  | print "[FAIL]: macro optimization failed\n" | 
|  | print "<<<< expecting '", | 
|  | print expected, | 
|  | print "'\n>>>> result '", | 
|  | print out.get(), | 
|  | print "'\n----" | 
|  | global failure_count | 
|  | failure_count += 1 | 
|  |  | 
|  |  | 
|  | def runUnitTests(): | 
|  | """Always run all unit tests for this program.""" | 
|  | test_CppTokenizer() | 
|  | test_CppExpr() | 
|  | test_optimizeAll() | 
|  | test_BlockParser() | 
|  |  | 
|  |  | 
|  | failure_count = 0 | 
|  | runUnitTests() | 
|  | if failure_count != 0: | 
|  | utils.panic("Unit tests failed in cpp.py.\n") |