reduce forest generation to be incremental
Previously, symlink forest generation involved removing the entire
symlink forest and recreating it from scratch. With this change,
a) symlinks which need not change are untouched,
b) symlinks pointing to the wrong location are fixed, and
c) symlinks which should no longer exist are removed.
On AOSP on my local machine, this reduces the symlink forest generation
step from 2.5s to 1.1s clean, and 0.6s when a single file is added to
a source directory.
Bug: 257528847
Test: m bp2build, touch `fakefile` under the forest, remove a file
from the source tree, rerun m bp2build. Manually verify the new forest
does not retain the link to the deleted source file, and that fakefile
no longer exists in the forest.
Change-Id: I481371ae487e9419af6a3a4370c552578b07d650
diff --git a/bp2build/symlink_forest.go b/bp2build/symlink_forest.go
index 37188f1..183eb12 100644
--- a/bp2build/symlink_forest.go
+++ b/bp2build/symlink_forest.go
@@ -15,9 +15,7 @@
package bp2build
import (
- "errors"
"fmt"
- "io/fs"
"io/ioutil"
"os"
"path/filepath"
@@ -53,59 +51,6 @@
symlinkCount atomic.Uint64
}
-// A simple thread pool to limit concurrency on system calls.
-// Necessary because Go spawns a new OS-level thread for each blocking system
-// call. This means that if syscalls are too slow and there are too many of
-// them, the hard limit on OS-level threads can be exhausted.
-type syscallPool struct {
- shutdownCh []chan<- struct{}
- workCh chan syscall
-}
-
-type syscall struct {
- work func()
- done chan<- struct{}
-}
-
-func createSyscallPool(count int) *syscallPool {
- result := &syscallPool{
- shutdownCh: make([]chan<- struct{}, count),
- workCh: make(chan syscall),
- }
-
- for i := 0; i < count; i++ {
- shutdownCh := make(chan struct{})
- result.shutdownCh[i] = shutdownCh
- go result.worker(shutdownCh)
- }
-
- return result
-}
-
-func (p *syscallPool) do(work func()) {
- doneCh := make(chan struct{})
- p.workCh <- syscall{work, doneCh}
- <-doneCh
-}
-
-func (p *syscallPool) shutdown() {
- for _, ch := range p.shutdownCh {
- ch <- struct{}{} // Blocks until the value is received
- }
-}
-
-func (p *syscallPool) worker(shutdownCh <-chan struct{}) {
- for {
- select {
- case <-shutdownCh:
- return
- case work := <-p.workCh:
- work.work()
- work.done <- struct{}{}
- }
- }
-}
-
// Ensures that the node for the given path exists in the tree and returns it.
func ensureNodeExists(root *instructionsNode, path string) *instructionsNode {
if path == "" {
@@ -217,12 +162,35 @@
}
// Creates a symbolic link at dst pointing to src
-func symlinkIntoForest(topdir, dst, src string) {
- err := os.Symlink(shared.JoinPath(topdir, src), shared.JoinPath(topdir, dst))
- if err != nil {
+func symlinkIntoForest(topdir, dst, src string) uint64 {
+ srcPath := shared.JoinPath(topdir, src)
+ dstPath := shared.JoinPath(topdir, dst)
+
+ // Check if a symlink already exists.
+ if dstInfo, err := os.Lstat(dstPath); err != nil {
+ if !os.IsNotExist(err) {
+ fmt.Fprintf(os.Stderr, "Failed to lstat '%s': %s", dst, err)
+ os.Exit(1)
+ }
+ } else {
+ if dstInfo.Mode()&os.ModeSymlink != 0 {
+ // Assume that the link's target is correct, i.e. no manual tampering.
+ // E.g. OUT_DIR could have been previously used with a different source tree check-out!
+ return 0
+ } else {
+ if err := os.RemoveAll(dstPath); err != nil {
+ fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", dst, err)
+ os.Exit(1)
+ }
+ }
+ }
+
+ // Create symlink.
+ if err := os.Symlink(srcPath, dstPath); err != nil {
fmt.Fprintf(os.Stderr, "Cannot create symlink at '%s' pointing to '%s': %s", dst, src, err)
os.Exit(1)
}
+ return 1
}
func isDir(path string, fi os.FileInfo) bool {
@@ -253,8 +221,9 @@
defer context.wg.Done()
if instructions != nil && instructions.excluded {
- // This directory is not needed, bail out
- return
+ // Excluded paths are skipped at the level of the non-excluded parent.
+ fmt.Fprintf(os.Stderr, "may not specify a root-level exclude directory '%s'", srcDir)
+ os.Exit(1)
}
// We don't add buildFilesDir here because the bp2build files marker files is
@@ -272,6 +241,12 @@
renamingBuildFile = true
srcDirMap["BUILD.bazel"] = srcDirMap["BUILD"]
delete(srcDirMap, "BUILD")
+ if instructions != nil {
+ if _, ok := instructions.children["BUILD"]; ok {
+ instructions.children["BUILD.bazel"] = instructions.children["BUILD"]
+ delete(instructions.children, "BUILD")
+ }
+ }
}
}
}
@@ -288,17 +263,41 @@
// Tests read the error messages generated, so ensure their order is deterministic
sort.Strings(allEntries)
- err := os.MkdirAll(shared.JoinPath(context.topdir, forestDir), 0777)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Cannot mkdir '%s': %s\n", forestDir, err)
- os.Exit(1)
+ fullForestPath := shared.JoinPath(context.topdir, forestDir)
+ createForestDir := false
+ if fi, err := os.Lstat(fullForestPath); err != nil {
+ if os.IsNotExist(err) {
+ createForestDir = true
+ } else {
+ fmt.Fprintf(os.Stderr, "Could not read info for '%s': %s\n", forestDir, err)
+ }
+ } else if fi.Mode()&os.ModeDir == 0 {
+ if err := os.RemoveAll(fullForestPath); err != nil {
+ fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", forestDir, err)
+ os.Exit(1)
+ }
+ createForestDir = true
}
- context.mkdirCount.Add(1)
+ if createForestDir {
+ if err := os.MkdirAll(fullForestPath, 0777); err != nil {
+ fmt.Fprintf(os.Stderr, "Could not mkdir '%s': %s\n", forestDir, err)
+ os.Exit(1)
+ }
+ context.mkdirCount.Add(1)
+ }
+
+ // Start with a list of items that already exist in the forest, and remove
+ // each element as it is processed in allEntries. Any remaining items in
+ // forestMapForDeletion must be removed. (This handles files which were
+ // removed since the previous forest generation).
+ forestMapForDeletion := readdirToMap(shared.JoinPath(context.topdir, forestDir))
for _, f := range allEntries {
if f[0] == '.' {
continue // Ignore dotfiles
}
+ delete(forestMapForDeletion, f)
+ // todo add deletionCount metric
// The full paths of children in the input trees and in the output tree
forestChild := shared.JoinPath(forestDir, f)
@@ -309,13 +308,9 @@
buildFilesChild := shared.JoinPath(buildFilesDir, f)
// Descend in the instruction tree if it exists
- var instructionsChild *instructionsNode = nil
+ var instructionsChild *instructionsNode
if instructions != nil {
- if f == "BUILD.bazel" && renamingBuildFile {
- instructionsChild = instructions.children["BUILD"]
- } else {
- instructionsChild = instructions.children[f]
- }
+ instructionsChild = instructions.children[f]
}
srcChildEntry, sExists := srcDirMap[f]
@@ -323,8 +318,7 @@
if instructionsChild != nil && instructionsChild.excluded {
if bExists {
- symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
- context.symlinkCount.Add(1)
+ context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
}
continue
}
@@ -340,8 +334,7 @@
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else {
// Not in the source tree, symlink BUILD file
- symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
- context.symlinkCount.Add(1)
+ context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
}
} else if !bExists {
if sDir && instructionsChild != nil {
@@ -351,8 +344,7 @@
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else {
// Not in the build file tree, symlink source tree, carry on
- symlinkIntoForest(context.topdir, forestChild, srcChild)
- context.symlinkCount.Add(1)
+ context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, srcChild))
}
} else if sDir && bDir {
// Both are directories. Descend.
@@ -365,8 +357,7 @@
// The Android.bp file that codegen used to produce `buildFilesChild` is
// already a dependency, we can ignore `buildFilesChild`.
context.depCh <- srcChild
- err = mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose)
- if err != nil {
+ if err := mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose); err != nil {
fmt.Fprintf(os.Stderr, "Error merging %s and %s: %s",
srcBuildFile, generatedBuildFile, err)
os.Exit(1)
@@ -379,51 +370,27 @@
os.Exit(1)
}
}
-}
-func removeParallelRecursive(pool *syscallPool, path string, fi os.FileInfo, wg *sync.WaitGroup) {
- defer wg.Done()
-
- if fi.IsDir() {
- children := readdirToMap(path)
- childrenWg := &sync.WaitGroup{}
- childrenWg.Add(len(children))
-
- for child, childFi := range children {
- go removeParallelRecursive(pool, shared.JoinPath(path, child), childFi, childrenWg)
+ // Remove all files in the forest that exist in neither the source
+ // tree nor the build files tree. (This handles files which were removed
+ // since the previous forest generation).
+ for f := range forestMapForDeletion {
+ var instructionsChild *instructionsNode
+ if instructions != nil {
+ instructionsChild = instructions.children[f]
}
- childrenWg.Wait()
- }
-
- pool.do(func() {
- if err := os.Remove(path); err != nil {
- fmt.Fprintf(os.Stderr, "Cannot unlink '%s': %s\n", path, err)
+ if instructionsChild != nil && instructionsChild.excluded {
+ // This directory may be excluded because bazel writes to it under the
+ // forest root. Thus this path is intentionally left alone.
+ continue
+ }
+ forestChild := shared.JoinPath(context.topdir, forestDir, f)
+ if err := os.RemoveAll(forestChild); err != nil {
+ fmt.Fprintf(os.Stderr, "Failed to remove '%s/%s': %s", forestDir, f, err)
os.Exit(1)
}
- })
-}
-
-func removeParallel(path string) {
- fi, err := os.Lstat(path)
- if err != nil {
- if errors.Is(err, fs.ErrNotExist) {
- return
- }
-
- fmt.Fprintf(os.Stderr, "Cannot lstat '%s': %s\n", path, err)
- os.Exit(1)
}
-
- wg := &sync.WaitGroup{}
- wg.Add(1)
-
- // Random guess as to the best number of syscalls to run in parallel
- pool := createSyscallPool(100)
- removeParallelRecursive(pool, path, fi, wg)
- pool.shutdown()
-
- wg.Wait()
}
// PlantSymlinkForest Creates a symlink forest by merging the directory tree at "buildFiles" and
@@ -439,8 +406,6 @@
symlinkCount: atomic.Uint64{},
}
- removeParallel(shared.JoinPath(topdir, forest))
-
instructions := instructionsFromExcludePathList(exclude)
go func() {
context.wg.Add(1)
diff --git a/cmd/soong_build/main.go b/cmd/soong_build/main.go
index 29a6f95..c005f7c 100644
--- a/cmd/soong_build/main.go
+++ b/cmd/soong_build/main.go
@@ -447,6 +447,7 @@
"bazel-genfiles",
"bazel-out",
"bazel-testlogs",
+ "bazel-workspace",
"bazel-" + filepath.Base(topDir),
}
}
diff --git a/tests/bp2build_bazel_test.sh b/tests/bp2build_bazel_test.sh
index 6477dac..878b4a1 100755
--- a/tests/bp2build_bazel_test.sh
+++ b/tests/bp2build_bazel_test.sh
@@ -140,7 +140,7 @@
# NOTE: We don't actually use the extra BUILD file for anything here
run_bazel build --config=android --config=bp2build --config=ci //foo/...
- local the_answer_file="$(find -L bazel-out -name the_answer.txt)"
+ local -r the_answer_file="$(find -L bazel-out -name the_answer.txt)"
if [[ ! -f "${the_answer_file}" ]]; then
fail "Expected the_answer.txt to be generated, but was missing"
fi
@@ -156,6 +156,49 @@
eval "${_save_trap}"
}
+function test_bp2build_symlinks_files {
+ setup
+ mkdir -p foo
+ touch foo/BLANK1
+ touch foo/BLANK2
+ touch foo/F2D
+ touch foo/BUILD
+
+ run_soong bp2build
+
+ if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
+ fail "./out/soong/workspace/foo/BUILD should be omitted"
+ fi
+ for file in BLANK1 BLANK2 F2D
+ do
+ if [[ ! -L "./out/soong/workspace/foo/$file" ]]; then
+ fail "./out/soong/workspace/foo/$file should exist"
+ fi
+ done
+ local -r BLANK1_BEFORE=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
+
+ rm foo/BLANK2
+ rm foo/F2D
+ mkdir foo/F2D
+ touch foo/F2D/BUILD
+
+ run_soong bp2build
+
+ if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
+ fail "./out/soong/workspace/foo/BUILD should be omitted"
+ fi
+ local -r BLANK1_AFTER=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
+ if [[ "$BLANK1_AFTER" != "$BLANK1_BEFORE" ]]; then
+ fail "./out/soong/workspace/foo/BLANK1 should be untouched"
+ fi
+ if [[ -e "./out/soong/workspace/foo/BLANK2" ]]; then
+ fail "./out/soong/workspace/foo/BLANK2 should be removed"
+ fi
+ if [[ -L "./out/soong/workspace/foo/F2D" ]] || [[ ! -d "./out/soong/workspace/foo/F2D" ]]; then
+ fail "./out/soong/workspace/foo/F2D should be a dir"
+ fi
+}
+
function test_cc_correctness {
setup