Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 1 | /* |
Elliott Hughes | dfb74c5 | 2016-10-24 12:53:17 -0700 | [diff] [blame] | 2 | * Copyright (C) 2016 The Android Open Source Project |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
Elliott Hughes | dfb74c5 | 2016-10-24 12:53:17 -0700 | [diff] [blame] | 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "Preprocessor.h" |
| 18 | |
| 19 | #include <err.h> |
| 20 | #include <fcntl.h> |
Josh Gao | 7978634 | 2016-08-12 16:17:22 -0700 | [diff] [blame] | 21 | #include <fts.h> |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 22 | #include <libgen.h> |
| 23 | #include <string.h> |
| 24 | #include <sys/stat.h> |
| 25 | #include <sys/types.h> |
| 26 | #include <unistd.h> |
| 27 | |
| 28 | #include <deque> |
| 29 | #include <fstream> |
| 30 | #include <string> |
| 31 | #include <unordered_map> |
| 32 | |
| 33 | #include <llvm/ADT/StringRef.h> |
| 34 | #include <llvm/ADT/Twine.h> |
Josh Gao | 7978634 | 2016-08-12 16:17:22 -0700 | [diff] [blame] | 35 | #include <llvm/Support/FileSystem.h> |
| 36 | #include <llvm/Support/Path.h> |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 37 | |
| 38 | #include "Arch.h" |
| 39 | #include "DeclarationDatabase.h" |
| 40 | #include "versioner.h" |
| 41 | |
| 42 | using namespace std::string_literals; |
| 43 | |
| 44 | static DeclarationAvailability calculateRequiredGuard(const Declaration& declaration) { |
| 45 | // To avoid redundant macro guards, the availability calculated by this function is the set |
| 46 | // difference of 'targets marked-available' from 'targets the declaration is visible in'. |
| 47 | // For example, a declaration that is visible always and introduced in 9 would return introduced |
| 48 | // in 9, but the same declaration, except only visible in 9+ would return an empty |
| 49 | // DeclarationAvailability. |
| 50 | |
| 51 | // This currently only handles __INTRODUCED_IN. |
| 52 | // TODO: Do the same for __REMOVED_IN. |
| 53 | int global_min_api_visible = 0; |
| 54 | ArchMap<int> arch_visibility; |
| 55 | |
| 56 | for (const auto& it : declaration.availability) { |
| 57 | const CompilationType& type = it.first; |
| 58 | |
| 59 | if (global_min_api_visible == 0 || global_min_api_visible > type.api_level) { |
| 60 | global_min_api_visible = type.api_level; |
| 61 | } |
| 62 | |
| 63 | if (arch_visibility[type.arch] == 0 || arch_visibility[type.arch] > type.api_level) { |
| 64 | arch_visibility[type.arch] = type.api_level; |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | DeclarationAvailability decl_av; |
| 69 | if (!declaration.calculateAvailability(&decl_av)) { |
| 70 | fprintf(stderr, "versioner: failed to calculate availability while preprocessing:\n"); |
| 71 | declaration.dump("", stderr, 2); |
| 72 | exit(1); |
| 73 | } |
| 74 | |
| 75 | D("Calculating required guard for %s:\n", declaration.name.c_str()); |
| 76 | D(" Declaration availability: %s\n", to_string(decl_av).c_str()); |
| 77 | |
| 78 | if (verbose) { |
| 79 | std::string arch_visibility_str; |
| 80 | for (Arch arch : supported_archs) { |
| 81 | if (arch_visibility[arch] != 0) { |
| 82 | arch_visibility_str += to_string(arch); |
| 83 | arch_visibility_str += ": "; |
| 84 | arch_visibility_str += std::to_string(arch_visibility[arch]); |
| 85 | arch_visibility_str += ", "; |
| 86 | } |
| 87 | } |
| 88 | if (!arch_visibility_str.empty()) { |
| 89 | arch_visibility_str.resize(arch_visibility_str.size() - 2); |
| 90 | } |
| 91 | D(" Declaration visibility: global = %d, arch = %s\n", global_min_api_visible, |
| 92 | arch_visibility_str.c_str()); |
| 93 | } |
| 94 | |
| 95 | DeclarationAvailability result = decl_av; |
Josh Gao | 9f7ce3d | 2016-08-15 13:44:37 -0700 | [diff] [blame] | 96 | if (result.global_availability.introduced <= global_min_api_visible) { |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 97 | result.global_availability.introduced = 0; |
| 98 | } |
| 99 | |
| 100 | for (Arch arch : supported_archs) { |
Josh Gao | 9f7ce3d | 2016-08-15 13:44:37 -0700 | [diff] [blame] | 101 | if (result.arch_availability[arch].introduced <= arch_visibility[arch]) { |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 102 | result.arch_availability[arch].introduced = 0; |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | D(" Calculated result: %s\n", to_string(result).c_str()); |
| 107 | D("\n"); |
| 108 | |
| 109 | return result; |
| 110 | } |
| 111 | |
| 112 | static std::deque<std::string> readFileLines(const std::string& path) { |
| 113 | std::ifstream is(path.c_str()); |
| 114 | std::deque<std::string> result; |
| 115 | std::string line; |
| 116 | |
| 117 | while (std::getline(is, line)) { |
| 118 | result.push_back(std::move(line)); |
| 119 | } |
| 120 | |
| 121 | return result; |
| 122 | } |
| 123 | |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 124 | static void writeFileLines(const std::string& path, const std::deque<std::string>& lines) { |
| 125 | if (!mkdirs(dirname(path))) { |
| 126 | err(1, "failed to create directory '%s'", dirname(path).c_str()); |
| 127 | } |
| 128 | |
| 129 | std::ofstream os(path.c_str(), std::ios_base::out | std::ios_base::trunc); |
| 130 | |
| 131 | for (const std::string& line : lines) { |
| 132 | os << line << "\n"; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | using GuardMap = std::map<Location, DeclarationAvailability>; |
| 137 | |
| 138 | static std::string generateGuardCondition(const DeclarationAvailability& avail) { |
| 139 | // Logically orred expressions that constitute the macro guard. |
| 140 | std::vector<std::string> expressions; |
| 141 | static const std::vector<std::pair<std::string, std::set<Arch>>> arch_sets = { |
| 142 | { "", supported_archs }, |
| 143 | { "!defined(__LP64__)", { Arch::arm, Arch::mips, Arch::x86 } }, |
| 144 | { "defined(__LP64__)", { Arch::arm64, Arch::mips64, Arch::x86_64 } }, |
| 145 | { "defined(__mips__)", { Arch::mips, Arch::mips64 } }, |
| 146 | }; |
| 147 | std::map<Arch, std::string> individual_archs = { |
| 148 | { Arch::arm, "defined(__arm__)" }, |
| 149 | { Arch::arm64, "defined(__aarch64__)" }, |
| 150 | { Arch::mips, "defined(__mips__) && !defined(__LP64__)" }, |
| 151 | { Arch::mips64, "defined(__mips__) && defined(__LP64__)" }, |
| 152 | { Arch::x86, "defined(__i386__)" }, |
| 153 | { Arch::x86_64, "defined(__x86_64__)" }, |
| 154 | }; |
| 155 | |
| 156 | auto generate_guard = [](const std::string& arch_expr, int min_version) { |
| 157 | if (min_version == 0) { |
| 158 | return arch_expr; |
| 159 | } |
| 160 | return arch_expr + " && __ANDROID_API__ >= " + std::to_string(min_version); |
| 161 | }; |
| 162 | |
| 163 | D("Generating guard for availability: %s\n", to_string(avail).c_str()); |
| 164 | if (!avail.global_availability.empty()) { |
| 165 | for (Arch arch : supported_archs) { |
| 166 | if (!avail.arch_availability[arch].empty()) { |
| 167 | errx(1, "attempted to generate guard with global and per-arch values: %s", |
| 168 | to_string(avail).c_str()); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | if (avail.global_availability.introduced == 0) { |
| 173 | fprintf(stderr, "warning: attempted to generate guard with empty availability: %s\n", |
| 174 | to_string(avail).c_str()); |
| 175 | return ""; |
| 176 | } |
| 177 | |
| 178 | if (avail.global_availability.introduced <= 9) { |
| 179 | return ""; |
| 180 | } |
| 181 | |
| 182 | return "__ANDROID_API__ >= "s + std::to_string(avail.global_availability.introduced); |
| 183 | } |
| 184 | |
| 185 | for (const auto& it : arch_sets) { |
| 186 | const std::string& arch_expr = it.first; |
| 187 | const std::set<Arch>& archs = it.second; |
| 188 | |
| 189 | D(" Checking arch set '%s'\n", arch_expr.c_str()); |
| 190 | |
| 191 | int version = avail.arch_availability[*it.second.begin()].introduced; |
| 192 | |
| 193 | // Assume that the entire declaration is declared __INTRODUCED_IN_FUTURE if one arch is. |
| 194 | bool future = avail.arch_availability[*it.second.begin()].future; |
| 195 | |
| 196 | if (future) { |
Josh Gao | 9ae2b9a | 2016-11-08 16:17:45 -0800 | [diff] [blame] | 197 | return "__ANDROID_API__ >= __ANDROID_API_FUTURE__"; |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 198 | } |
| 199 | |
| 200 | // The maximum min_version of the set. |
| 201 | int max_min_version = 0; |
| 202 | for (Arch arch : archs) { |
| 203 | if (arch_min_api[arch] > max_min_version) { |
| 204 | max_min_version = arch_min_api[arch]; |
| 205 | } |
| 206 | |
| 207 | if (avail.arch_availability[arch].introduced != version) { |
| 208 | D(" Skipping arch set, availability for %s doesn't match %s\n", |
| 209 | to_string(*it.second.begin()).c_str(), to_string(arch).c_str()); |
| 210 | goto skip; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | // If all of the archs in the set have a min_api that satifies version, elide the check. |
| 215 | if (max_min_version >= version) { |
| 216 | version = 0; |
| 217 | } |
| 218 | |
| 219 | expressions.emplace_back(generate_guard(arch_expr, version)); |
| 220 | |
| 221 | D(" Generated expression '%s'\n", expressions.rbegin()->c_str()); |
| 222 | |
| 223 | for (Arch arch : archs) { |
| 224 | individual_archs.erase(arch); |
| 225 | } |
| 226 | |
| 227 | skip: |
| 228 | continue; |
| 229 | } |
| 230 | |
| 231 | for (const auto& it : individual_archs) { |
| 232 | const std::string& arch_expr = it.second; |
| 233 | int introduced = avail.arch_availability[it.first].introduced; |
| 234 | if (introduced == 0) { |
| 235 | expressions.emplace_back(arch_expr); |
| 236 | } else { |
| 237 | expressions.emplace_back(generate_guard(arch_expr, introduced)); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | if (expressions.size() == 0) { |
| 242 | errx(1, "generated empty guard for availability %s", to_string(avail).c_str()); |
| 243 | } else if (expressions.size() == 1) { |
| 244 | return expressions[0]; |
| 245 | } |
| 246 | |
| 247 | return "("s + Join(expressions, ") || (") + ")"; |
| 248 | } |
| 249 | |
| 250 | // Assumes that nothing crazy is happening (e.g. having the semicolon be in a macro) |
| 251 | static FileLocation findNextSemicolon(const std::deque<std::string>& lines, FileLocation start) { |
| 252 | unsigned current_line = start.line; |
| 253 | unsigned current_column = start.column; |
| 254 | while (current_line <= lines.size()) { |
| 255 | size_t result = lines[current_line - 1].find_first_of(';', current_column - 1); |
| 256 | |
| 257 | if (result != std::string::npos) { |
| 258 | FileLocation loc = { |
| 259 | .line = current_line, |
| 260 | .column = unsigned(result) + 1, |
| 261 | }; |
| 262 | |
| 263 | return loc; |
| 264 | } |
| 265 | |
| 266 | ++current_line; |
| 267 | current_column = 0; |
| 268 | } |
| 269 | |
| 270 | errx(1, "failed to find semicolon starting from %u:%u", start.line, start.column); |
| 271 | } |
| 272 | |
| 273 | // Merge adjacent blocks with identical guards. |
| 274 | static void mergeGuards(std::deque<std::string>& file_lines, GuardMap& guard_map) { |
| 275 | if (guard_map.size() < 2) { |
| 276 | return; |
| 277 | } |
| 278 | |
| 279 | auto current = guard_map.begin(); |
| 280 | auto next = current; |
| 281 | ++next; |
| 282 | |
| 283 | while (next != guard_map.end()) { |
| 284 | if (current->second != next->second) { |
| 285 | ++current; |
| 286 | ++next; |
| 287 | continue; |
| 288 | } |
| 289 | |
| 290 | // Scan from the end of current to the beginning of next. |
| 291 | bool in_block_comment = false; |
| 292 | bool valid = true; |
| 293 | |
| 294 | FileLocation current_location = current->first.end; |
| 295 | FileLocation end_location = next->first.start; |
| 296 | |
| 297 | auto nextLine = [¤t_location]() { |
| 298 | ++current_location.line; |
| 299 | current_location.column = 1; |
| 300 | }; |
| 301 | |
| 302 | auto nextCol = [&file_lines, ¤t_location, &nextLine]() { |
| 303 | if (current_location.column == file_lines[current_location.column - 1].length()) { |
| 304 | nextLine(); |
| 305 | } else { |
| 306 | ++current_location.column; |
| 307 | } |
| 308 | }; |
| 309 | |
| 310 | // The end location will point to the semicolon, which we don't want to read, so skip it. |
| 311 | nextCol(); |
| 312 | |
| 313 | while (current_location < end_location) { |
| 314 | const std::string& line = file_lines[current_location.line - 1]; |
| 315 | size_t line_index = current_location.column - 1; |
| 316 | |
| 317 | if (in_block_comment) { |
| 318 | size_t pos = line.find("*/", line_index); |
| 319 | if (pos == std::string::npos) { |
| 320 | D("Didn't find block comment terminator, skipping line\n"); |
| 321 | nextLine(); |
| 322 | continue; |
| 323 | } else { |
| 324 | D("Found block comment terminator\n"); |
| 325 | in_block_comment = false; |
| 326 | current_location.column = pos + 2; |
| 327 | nextCol(); |
| 328 | continue; |
| 329 | } |
| 330 | } else { |
| 331 | size_t pos = line.find_first_not_of(" \t", line_index); |
| 332 | if (pos == std::string::npos) { |
| 333 | nextLine(); |
| 334 | continue; |
| 335 | } |
| 336 | |
| 337 | current_location.column = pos + 1; |
| 338 | if (line[pos] != '/') { |
| 339 | D("Trailing character '%c' is not a slash: %s\n", line[pos], line.substr(pos).c_str()); |
| 340 | valid = false; |
| 341 | break; |
| 342 | } |
| 343 | |
| 344 | nextCol(); |
| 345 | if (line.length() <= pos + 1) { |
| 346 | // Trailing slash at the end of a line? |
| 347 | D("Trailing slash at end of line\n"); |
| 348 | valid = false; |
| 349 | break; |
| 350 | } |
| 351 | |
| 352 | if (line[pos + 1] == '/') { |
| 353 | // C++ style comment |
| 354 | nextLine(); |
| 355 | } else if (line[pos + 1] == '*') { |
| 356 | // Block comment |
| 357 | nextCol(); |
| 358 | in_block_comment = true; |
| 359 | D("In a block comment\n"); |
| 360 | } else { |
| 361 | // Garbage? |
| 362 | D("Unexpected output after /: %s\n", line.substr(pos).c_str()); |
| 363 | valid = false; |
| 364 | break; |
| 365 | } |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | if (!valid) { |
| 370 | D("Not merging blocks %s and %s\n", to_string(current->first).c_str(), |
| 371 | to_string(next->first).c_str()); |
| 372 | ++current; |
| 373 | ++next; |
| 374 | continue; |
| 375 | } |
| 376 | |
| 377 | D("Merging blocks %s and %s\n", to_string(current->first).c_str(), |
| 378 | to_string(next->first).c_str()); |
| 379 | |
| 380 | Location merged = current->first; |
| 381 | merged.end = next->first.end; |
| 382 | |
| 383 | DeclarationAvailability avail = current->second; |
| 384 | |
| 385 | guard_map.erase(current); |
| 386 | guard_map.erase(next); |
| 387 | bool dummy; |
| 388 | std::tie(current, dummy) = guard_map.insert(std::make_pair(merged, avail)); |
| 389 | next = current; |
| 390 | ++next; |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | static void rewriteFile(const std::string& output_path, std::deque<std::string>& file_lines, |
| 395 | const GuardMap& guard_map) { |
| 396 | for (auto it = guard_map.rbegin(); it != guard_map.rend(); ++it) { |
| 397 | const Location& loc = it->first; |
| 398 | const DeclarationAvailability& avail = it->second; |
| 399 | |
| 400 | std::string condition = generateGuardCondition(avail); |
| 401 | if (condition.empty()) { |
| 402 | continue; |
| 403 | } |
| 404 | |
| 405 | std::string prologue = "\n#if "s + condition + "\n"; |
| 406 | std::string epilogue = "\n#endif /* " + condition + " */\n"; |
| 407 | |
| 408 | file_lines[loc.end.line - 1].insert(loc.end.column, epilogue); |
| 409 | file_lines[loc.start.line - 1].insert(loc.start.column - 1, prologue); |
| 410 | } |
| 411 | |
| 412 | printf("Preprocessing %s...\n", output_path.c_str()); |
| 413 | writeFileLines(output_path, file_lines); |
| 414 | } |
| 415 | |
| 416 | bool preprocessHeaders(const std::string& dst_dir, const std::string& src_dir, |
| 417 | HeaderDatabase* database) { |
| 418 | std::unordered_map<std::string, GuardMap> guards; |
| 419 | std::unordered_map<std::string, std::deque<std::string>> file_lines; |
| 420 | |
| 421 | for (const auto& symbol_it : database->symbols) { |
| 422 | const Symbol& symbol = symbol_it.second; |
| 423 | |
| 424 | for (const auto& decl_it : symbol.declarations) { |
| 425 | const Location& location = decl_it.first; |
| 426 | const Declaration& decl = decl_it.second; |
| 427 | |
Josh Gao | fff29fe | 2016-09-07 18:29:08 -0700 | [diff] [blame] | 428 | if (decl.no_guard) { |
| 429 | // No guard required. |
| 430 | continue; |
| 431 | } |
| 432 | |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 433 | DeclarationAvailability macro_guard = calculateRequiredGuard(decl); |
| 434 | if (!macro_guard.empty()) { |
| 435 | guards[location.filename][location] = macro_guard; |
| 436 | } |
| 437 | } |
| 438 | } |
| 439 | |
Josh Gao | a6b8c4e | 2016-08-15 13:04:51 -0700 | [diff] [blame] | 440 | // Copy over the original headers before preprocessing. |
Josh Gao | 7978634 | 2016-08-12 16:17:22 -0700 | [diff] [blame] | 441 | char* fts_paths[2] = { const_cast<char*>(src_dir.c_str()), nullptr }; |
| 442 | FTS* fts = fts_open(fts_paths, FTS_LOGICAL, nullptr); |
| 443 | while (FTSENT* ent = fts_read(fts)) { |
| 444 | llvm::StringRef path = ent->fts_path; |
| 445 | if (!path.startswith(src_dir)) { |
| 446 | err(1, "path '%s' doesn't start with source dir '%s'", ent->fts_path, src_dir.c_str()); |
| 447 | } |
| 448 | |
| 449 | if (ent->fts_info != FTS_F) { |
| 450 | continue; |
| 451 | } |
| 452 | |
| 453 | std::string rel_path = path.substr(src_dir.length() + 1); |
Josh Gao | a6b8c4e | 2016-08-15 13:04:51 -0700 | [diff] [blame] | 454 | std::string dst_path = dst_dir + "/" + rel_path; |
| 455 | llvm::StringRef parent_path = llvm::sys::path::parent_path(dst_path); |
| 456 | if (llvm::sys::fs::create_directories(parent_path)) { |
| 457 | errx(1, "failed to ensure existence of directory '%s'", parent_path.str().c_str()); |
| 458 | } |
| 459 | if (llvm::sys::fs::copy_file(path, dst_path)) { |
| 460 | errx(1, "failed to copy '%s/%s' to '%s'", src_dir.c_str(), path.str().c_str(), |
| 461 | dst_path.c_str()); |
Josh Gao | 7978634 | 2016-08-12 16:17:22 -0700 | [diff] [blame] | 462 | } |
| 463 | } |
| 464 | fts_close(fts); |
| 465 | |
Josh Gao | f8592a3 | 2016-07-26 18:58:27 -0700 | [diff] [blame] | 466 | for (const auto& file_it : guards) { |
| 467 | file_lines[file_it.first] = readFileLines(file_it.first); |
| 468 | } |
| 469 | |
| 470 | for (auto& file_it : guards) { |
| 471 | llvm::StringRef file_path = file_it.first; |
| 472 | GuardMap& orig_guard_map = file_it.second; |
| 473 | |
| 474 | // The end positions given to us are the end of the declaration, which is some point before the |
| 475 | // semicolon. Fix up the end positions by scanning for the next semicolon. |
| 476 | GuardMap guard_map; |
| 477 | for (const auto& it : orig_guard_map) { |
| 478 | Location loc = it.first; |
| 479 | loc.end = findNextSemicolon(file_lines[file_path], loc.end); |
| 480 | guard_map[loc] = it.second; |
| 481 | } |
| 482 | |
| 483 | // TODO: Make sure that the Locations don't overlap. |
| 484 | // TODO: Merge adjacent non-identical guards. |
| 485 | mergeGuards(file_lines[file_path], guard_map); |
| 486 | |
| 487 | if (!file_path.startswith(src_dir)) { |
| 488 | errx(1, "input file %s is not in %s\n", file_path.str().c_str(), src_dir.c_str()); |
| 489 | } |
| 490 | |
| 491 | // rel_path has a leading slash. |
| 492 | llvm::StringRef rel_path = file_path.substr(src_dir.size(), file_path.size() - src_dir.size()); |
| 493 | std::string output_path = (llvm::Twine(dst_dir) + rel_path).str(); |
| 494 | |
| 495 | rewriteFile(output_path, file_lines[file_path], guard_map); |
| 496 | } |
| 497 | |
| 498 | return true; |
| 499 | } |