blob: 667b952a6f54b2b0df164398b30b7a5c112c9bd8 [file] [log] [blame]
Cole Faust324a92e2022-08-23 15:29:05 -07001// Copyright 2022 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020015package bp2build
16
17import (
Lukacs T. Berkibc5f7312022-10-31 09:01:34 +000018 "errors"
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020019 "fmt"
Lukacs T. Berkibc5f7312022-10-31 09:01:34 +000020 "io/fs"
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020021 "io/ioutil"
22 "os"
23 "path/filepath"
Cole Faust324a92e2022-08-23 15:29:05 -070024 "regexp"
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +000025 "sync"
26 "sync/atomic"
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020027
28 "android/soong/shared"
29)
30
31// A tree structure that describes what to do at each directory in the created
32// symlink tree. Currently it is used to enumerate which files/directories
33// should be excluded from symlinking. Each instance of "node" represents a file
34// or a directory. If excluded is true, then that file/directory should be
35// excluded from symlinking. Otherwise, the node is not excluded, but one of its
36// descendants is (otherwise the node in question would not exist)
Lukacs T. Berkic541cd22022-10-26 07:26:50 +000037
38type instructionsNode struct {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020039 name string
40 excluded bool // If false, this is just an intermediate node
Lukacs T. Berkic541cd22022-10-26 07:26:50 +000041 children map[string]*instructionsNode
42}
43
44type symlinkForestContext struct {
45 verbose bool
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +000046 topdir string // $TOPDIR
47
48 // State
Usta Shresthada15c612022-11-08 14:12:36 -050049 wg sync.WaitGroup
50 depCh chan string
51 mkdirCount atomic.Uint64
52 symlinkCount atomic.Uint64
Lukacs T. Berkib353cca2021-04-16 13:47:36 +020053}
54
Lukacs T. Berkibc5f7312022-10-31 09:01:34 +000055// A simple thread pool to limit concurrency on system calls.
56// Necessary because Go spawns a new OS-level thread for each blocking system
57// call. This means that if syscalls are too slow and there are too many of
58// them, the hard limit on OS-level threads can be exhausted.
59type syscallPool struct {
60 shutdownCh []chan<- struct{}
61 workCh chan syscall
62}
63
64type syscall struct {
65 work func()
66 done chan<- struct{}
67}
68
69func createSyscallPool(count int) *syscallPool {
70 result := &syscallPool{
71 shutdownCh: make([]chan<- struct{}, count),
72 workCh: make(chan syscall),
73 }
74
75 for i := 0; i < count; i++ {
76 shutdownCh := make(chan struct{})
77 result.shutdownCh[i] = shutdownCh
78 go result.worker(shutdownCh)
79 }
80
81 return result
82}
83
84func (p *syscallPool) do(work func()) {
85 doneCh := make(chan struct{})
86 p.workCh <- syscall{work, doneCh}
87 <-doneCh
88}
89
90func (p *syscallPool) shutdown() {
91 for _, ch := range p.shutdownCh {
92 ch <- struct{}{} // Blocks until the value is received
93 }
94}
95
96func (p *syscallPool) worker(shutdownCh <-chan struct{}) {
97 for {
98 select {
99 case <-shutdownCh:
100 return
101 case work := <-p.workCh:
102 work.work()
103 work.done <- struct{}{}
104 }
105 }
106}
107
Usta Shresthadb46a9b2022-07-11 11:29:56 -0400108// Ensures that the node for the given path exists in the tree and returns it.
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000109func ensureNodeExists(root *instructionsNode, path string) *instructionsNode {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200110 if path == "" {
111 return root
112 }
113
114 if path[len(path)-1] == '/' {
115 path = path[:len(path)-1] // filepath.Split() leaves a trailing slash
116 }
117
118 dir, base := filepath.Split(path)
119
120 // First compute the parent node...
121 dn := ensureNodeExists(root, dir)
122
123 // then create the requested node as its direct child, if needed.
124 if child, ok := dn.children[base]; ok {
125 return child
126 } else {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000127 dn.children[base] = &instructionsNode{base, false, make(map[string]*instructionsNode)}
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200128 return dn.children[base]
129 }
130}
131
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000132// Turns a list of paths to be excluded into a tree
133func instructionsFromExcludePathList(paths []string) *instructionsNode {
134 result := &instructionsNode{"", false, make(map[string]*instructionsNode)}
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200135
136 for _, p := range paths {
137 ensureNodeExists(result, p).excluded = true
138 }
139
140 return result
141}
142
Cole Faust324a92e2022-08-23 15:29:05 -0700143func mergeBuildFiles(output string, srcBuildFile string, generatedBuildFile string, verbose bool) error {
144
145 srcBuildFileContent, err := os.ReadFile(srcBuildFile)
146 if err != nil {
147 return err
148 }
149
150 generatedBuildFileContent, err := os.ReadFile(generatedBuildFile)
151 if err != nil {
152 return err
153 }
154
155 // There can't be a package() call in both the source and generated BUILD files.
156 // bp2build will generate a package() call for licensing information, but if
157 // there's no licensing information, it will still generate a package() call
158 // that just sets default_visibility=public. If the handcrafted build file
159 // also has a package() call, we'll allow it to override the bp2build
160 // generated one if it doesn't have any licensing information. If the bp2build
161 // one has licensing information and the handcrafted one exists, we'll leave
162 // them both in for bazel to throw an error.
163 packageRegex := regexp.MustCompile(`(?m)^package\s*\(`)
164 packageDefaultVisibilityRegex := regexp.MustCompile(`(?m)^package\s*\(\s*default_visibility\s*=\s*\[\s*"//visibility:public",?\s*]\s*\)`)
165 if packageRegex.Find(srcBuildFileContent) != nil {
166 if verbose && packageDefaultVisibilityRegex.Find(generatedBuildFileContent) != nil {
167 fmt.Fprintf(os.Stderr, "Both '%s' and '%s' have a package() target, removing the first one\n",
168 generatedBuildFile, srcBuildFile)
169 }
170 generatedBuildFileContent = packageDefaultVisibilityRegex.ReplaceAll(generatedBuildFileContent, []byte{})
171 }
172
173 outFile, err := os.Create(output)
174 if err != nil {
175 return err
176 }
177
178 _, err = outFile.Write(generatedBuildFileContent)
179 if err != nil {
180 return err
181 }
182
183 if generatedBuildFileContent[len(generatedBuildFileContent)-1] != '\n' {
184 _, err = outFile.WriteString("\n")
185 if err != nil {
186 return err
187 }
188 }
189
190 _, err = outFile.Write(srcBuildFileContent)
191 return err
192}
193
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200194// Calls readdir() and returns it as a map from the basename of the files in dir
195// to os.FileInfo.
196func readdirToMap(dir string) map[string]os.FileInfo {
197 entryList, err := ioutil.ReadDir(dir)
198 result := make(map[string]os.FileInfo)
199
200 if err != nil {
201 if os.IsNotExist(err) {
202 // It's okay if a directory doesn't exist; it just means that one of the
203 // trees to be merged contains parts the other doesn't
204 return result
205 } else {
206 fmt.Fprintf(os.Stderr, "Cannot readdir '%s': %s\n", dir, err)
207 os.Exit(1)
208 }
209 }
210
211 for _, fi := range entryList {
212 result[fi.Name()] = fi
213 }
214
215 return result
216}
217
218// Creates a symbolic link at dst pointing to src
219func symlinkIntoForest(topdir, dst, src string) {
220 err := os.Symlink(shared.JoinPath(topdir, src), shared.JoinPath(topdir, dst))
221 if err != nil {
222 fmt.Fprintf(os.Stderr, "Cannot create symlink at '%s' pointing to '%s': %s", dst, src, err)
223 os.Exit(1)
224 }
225}
226
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200227func isDir(path string, fi os.FileInfo) bool {
228 if (fi.Mode() & os.ModeSymlink) != os.ModeSymlink {
229 return fi.IsDir()
230 }
231
Jingwen Chend4b1dc82022-05-12 11:08:03 +0000232 fi2, statErr := os.Stat(path)
233 if statErr == nil {
234 return fi2.IsDir()
235 }
236
237 // Check if this is a dangling symlink. If so, treat it like a file, not a dir.
238 _, lstatErr := os.Lstat(path)
239 if lstatErr != nil {
240 fmt.Fprintf(os.Stderr, "Cannot stat or lstat '%s': %s\n%s\n", path, statErr, lstatErr)
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200241 os.Exit(1)
242 }
243
Jingwen Chend4b1dc82022-05-12 11:08:03 +0000244 return false
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200245}
246
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200247// Recursively plants a symlink forest at forestDir. The symlink tree will
248// contain every file in buildFilesDir and srcDir excluding the files in
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000249// instructions. Collects every directory encountered during the traversal of
250// srcDir .
251func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *instructionsNode, forestDir string, buildFilesDir string, srcDir string) {
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000252 defer context.wg.Done()
253
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000254 if instructions != nil && instructions.excluded {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200255 // This directory is not needed, bail out
256 return
257 }
258
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000259 // We don't add buildFilesDir here because the bp2build files marker files is
260 // already a dependency which covers it. If we ever wanted to turn this into
261 // a generic symlink forest creation tool, we'd need to add it, too.
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000262 context.depCh <- srcDir
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000263
264 srcDirMap := readdirToMap(shared.JoinPath(context.topdir, srcDir))
265 buildFilesMap := readdirToMap(shared.JoinPath(context.topdir, buildFilesDir))
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200266
Cole Faust324a92e2022-08-23 15:29:05 -0700267 renamingBuildFile := false
268 if _, ok := srcDirMap["BUILD"]; ok {
269 if _, ok := srcDirMap["BUILD.bazel"]; !ok {
270 if _, ok := buildFilesMap["BUILD.bazel"]; ok {
271 renamingBuildFile = true
272 srcDirMap["BUILD.bazel"] = srcDirMap["BUILD"]
273 delete(srcDirMap, "BUILD")
274 }
275 }
276 }
277
Usta Shrestha49d04e82022-10-17 17:36:49 -0400278 allEntries := make(map[string]struct{})
Usta Shresthadb46a9b2022-07-11 11:29:56 -0400279 for n := range srcDirMap {
Usta Shrestha49d04e82022-10-17 17:36:49 -0400280 allEntries[n] = struct{}{}
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200281 }
282
Usta Shresthadb46a9b2022-07-11 11:29:56 -0400283 for n := range buildFilesMap {
Usta Shrestha49d04e82022-10-17 17:36:49 -0400284 allEntries[n] = struct{}{}
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200285 }
286
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000287 err := os.MkdirAll(shared.JoinPath(context.topdir, forestDir), 0777)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200288 if err != nil {
289 fmt.Fprintf(os.Stderr, "Cannot mkdir '%s': %s\n", forestDir, err)
290 os.Exit(1)
291 }
Usta Shresthada15c612022-11-08 14:12:36 -0500292 context.mkdirCount.Add(1)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200293
Usta Shresthadb46a9b2022-07-11 11:29:56 -0400294 for f := range allEntries {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200295 if f[0] == '.' {
296 continue // Ignore dotfiles
297 }
298
299 // The full paths of children in the input trees and in the output tree
Lukacs T. Berki3f9416e2021-04-20 13:08:11 +0200300 forestChild := shared.JoinPath(forestDir, f)
301 srcChild := shared.JoinPath(srcDir, f)
Cole Faust324a92e2022-08-23 15:29:05 -0700302 if f == "BUILD.bazel" && renamingBuildFile {
303 srcChild = shared.JoinPath(srcDir, "BUILD")
304 }
Lukacs T. Berki3f9416e2021-04-20 13:08:11 +0200305 buildFilesChild := shared.JoinPath(buildFilesDir, f)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200306
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000307 // Descend in the instruction tree if it exists
308 var instructionsChild *instructionsNode = nil
309 if instructions != nil {
Cole Faust324a92e2022-08-23 15:29:05 -0700310 if f == "BUILD.bazel" && renamingBuildFile {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000311 instructionsChild = instructions.children["BUILD"]
Cole Faust324a92e2022-08-23 15:29:05 -0700312 } else {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000313 instructionsChild = instructions.children[f]
Cole Faust324a92e2022-08-23 15:29:05 -0700314 }
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200315 }
316
Lukacs T. Berki3f9416e2021-04-20 13:08:11 +0200317 srcChildEntry, sExists := srcDirMap[f]
318 buildFilesChildEntry, bExists := buildFilesMap[f]
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200319
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000320 if instructionsChild != nil && instructionsChild.excluded {
Cole Faust324a92e2022-08-23 15:29:05 -0700321 if bExists {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000322 symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
Usta Shresthada15c612022-11-08 14:12:36 -0500323 context.symlinkCount.Add(1)
Cole Faust324a92e2022-08-23 15:29:05 -0700324 }
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200325 continue
326 }
327
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000328 sDir := sExists && isDir(shared.JoinPath(context.topdir, srcChild), srcChildEntry)
329 bDir := bExists && isDir(shared.JoinPath(context.topdir, buildFilesChild), buildFilesChildEntry)
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200330
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200331 if !sExists {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000332 if bDir && instructionsChild != nil {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200333 // Not in the source tree, but we have to exclude something from under
334 // this subtree, so descend
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000335 context.wg.Add(1)
336 go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200337 } else {
338 // Not in the source tree, symlink BUILD file
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000339 symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
Usta Shresthada15c612022-11-08 14:12:36 -0500340 context.symlinkCount.Add(1)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200341 }
342 } else if !bExists {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000343 if sDir && instructionsChild != nil {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200344 // Not in the build file tree, but we have to exclude something from
345 // under this subtree, so descend
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000346 context.wg.Add(1)
347 go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200348 } else {
349 // Not in the build file tree, symlink source tree, carry on
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000350 symlinkIntoForest(context.topdir, forestChild, srcChild)
Usta Shresthada15c612022-11-08 14:12:36 -0500351 context.symlinkCount.Add(1)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200352 }
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200353 } else if sDir && bDir {
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200354 // Both are directories. Descend.
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000355 context.wg.Add(1)
356 go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
Lukacs T. Berkie3487c82022-05-02 10:13:19 +0200357 } else if !sDir && !bDir {
Cole Faust324a92e2022-08-23 15:29:05 -0700358 // Neither is a directory. Merge them.
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000359 srcBuildFile := shared.JoinPath(context.topdir, srcChild)
360 generatedBuildFile := shared.JoinPath(context.topdir, buildFilesChild)
Usta Shrestha783ec5c2022-10-25 01:22:36 -0400361 // The Android.bp file that codegen used to produce `buildFilesChild` is
362 // already a dependency, we can ignore `buildFilesChild`.
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000363 context.depCh <- srcChild
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000364 err = mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose)
Cole Faust324a92e2022-08-23 15:29:05 -0700365 if err != nil {
366 fmt.Fprintf(os.Stderr, "Error merging %s and %s: %s",
367 srcBuildFile, generatedBuildFile, err)
Usta Shrestha071f6c22022-10-25 12:34:06 -0400368 os.Exit(1)
Sasha Smundak0fd93e02022-05-19 19:34:31 -0700369 }
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200370 } else {
371 // Both exist and one is a file. This is an error.
372 fmt.Fprintf(os.Stderr,
Lukacs T. Berkib21166e2021-04-21 11:58:36 +0200373 "Conflict in workspace symlink tree creation: both '%s' and '%s' exist and exactly one is a directory\n",
Lukacs T. Berki3f9416e2021-04-20 13:08:11 +0200374 srcChild, buildFilesChild)
Usta Shrestha071f6c22022-10-25 12:34:06 -0400375 os.Exit(1)
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200376 }
377 }
378}
379
Lukacs T. Berkibc5f7312022-10-31 09:01:34 +0000380func removeParallelRecursive(pool *syscallPool, path string, fi os.FileInfo, wg *sync.WaitGroup) {
381 defer wg.Done()
382
383 if fi.IsDir() {
384 children := readdirToMap(path)
385 childrenWg := &sync.WaitGroup{}
386 childrenWg.Add(len(children))
387
388 for child, childFi := range children {
389 go removeParallelRecursive(pool, shared.JoinPath(path, child), childFi, childrenWg)
390 }
391
392 childrenWg.Wait()
393 }
394
395 pool.do(func() {
396 if err := os.Remove(path); err != nil {
397 fmt.Fprintf(os.Stderr, "Cannot unlink '%s': %s\n", path, err)
398 os.Exit(1)
399 }
400 })
401}
402
403func removeParallel(path string) {
404 fi, err := os.Lstat(path)
405 if err != nil {
406 if errors.Is(err, fs.ErrNotExist) {
407 return
408 }
409
410 fmt.Fprintf(os.Stderr, "Cannot lstat '%s': %s\n", path, err)
411 os.Exit(1)
412 }
413
414 wg := &sync.WaitGroup{}
415 wg.Add(1)
416
417 // Random guess as to the best number of syscalls to run in parallel
418 pool := createSyscallPool(100)
419 removeParallelRecursive(pool, path, fi, wg)
420 pool.shutdown()
421
422 wg.Wait()
423}
424
Usta Shresthada15c612022-11-08 14:12:36 -0500425// PlantSymlinkForest Creates a symlink forest by merging the directory tree at "buildFiles" and
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200426// "srcDir" while excluding paths listed in "exclude". Returns the set of paths
427// under srcDir on which readdir() had to be called to produce the symlink
428// forest.
Usta Shresthada15c612022-11-08 14:12:36 -0500429func PlantSymlinkForest(verbose bool, topdir string, forest string, buildFiles string, exclude []string) (deps []string, mkdirCount, symlinkCount uint64) {
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000430 context := &symlinkForestContext{
Usta Shresthada15c612022-11-08 14:12:36 -0500431 verbose: verbose,
432 topdir: topdir,
433 depCh: make(chan string),
434 mkdirCount: atomic.Uint64{},
435 symlinkCount: atomic.Uint64{},
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000436 }
437
Lukacs T. Berkibc5f7312022-10-31 09:01:34 +0000438 removeParallel(shared.JoinPath(topdir, forest))
Lukacs T. Berkic541cd22022-10-26 07:26:50 +0000439
440 instructions := instructionsFromExcludePathList(exclude)
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000441 go func() {
442 context.wg.Add(1)
443 plantSymlinkForestRecursive(context, instructions, forest, buildFiles, ".")
444 context.wg.Wait()
445 close(context.depCh)
446 }()
447
Lukacs T. Berki647e7ab2022-10-27 09:55:38 +0000448 for dep := range context.depCh {
449 deps = append(deps, dep)
450 }
451
Usta Shresthada15c612022-11-08 14:12:36 -0500452 return deps, context.mkdirCount.Load(), context.symlinkCount.Load()
Lukacs T. Berkib353cca2021-04-16 13:47:36 +0200453}