Make globs compatible with hash-based ninja semantics

Previously, globs worked by having soong_build rewrite a ninja file
that ran the globs, and then dependended on the results of that ninja
file. soong_build also pre-filled their outputs so that it wouldn't
be immediately rerun on the 2nd build.

However, the pre-filling of outputs worked for ninja, because although
it updated their timestamps, the soong ninja file was then touched
by soong_build after that, so the soong_build ninja file was newer
and ninja wouldn't rerun soong. But N2 reruns actions if their inputs'
mtimes change in any way, not just if they're newer. Similarly,
hashed-based ninja implementations could not enforce an order on
file contents, so they would have the same problem.

To fix this, lift the glob checking out of ninja and into soong_ui.
Soong_build will output a globs report file every time it's run, and
every time soong_ui is run it will check the globs file, and if any
globs change, update an input to soong_build. soong_ui is essentially
doing what was done in ninja with bpglob actions before.

Bug: 364749114
Test: m nothing, m nothing again doesn't reanalyze, create a new file under a glob directory, m nothing again reanalyzes
Change-Id: I0dbc5ec58c89b869b59cd0602b82215c4972d799
diff --git a/cmd/soong_build/main.go b/cmd/soong_build/main.go
index a8be7ec..24a44b4 100644
--- a/cmd/soong_build/main.go
+++ b/cmd/soong_build/main.go
@@ -15,7 +15,6 @@
 package main
 
 import (
-	"bytes"
 	"encoding/json"
 	"errors"
 	"flag"
@@ -29,10 +28,12 @@
 	"android/soong/android/allowlists"
 	"android/soong/bp2build"
 	"android/soong/shared"
+
 	"github.com/google/blueprint"
 	"github.com/google/blueprint/bootstrap"
 	"github.com/google/blueprint/deptools"
 	"github.com/google/blueprint/metrics"
+	"github.com/google/blueprint/pathtools"
 	"github.com/google/blueprint/proptools"
 	androidProtobuf "google.golang.org/protobuf/android"
 )
@@ -42,8 +43,6 @@
 	availableEnvFile string
 	usedEnvFile      string
 
-	globFile    string
-	globListDir string
 	delveListen string
 	delvePath   string
 
@@ -64,8 +63,6 @@
 	flag.StringVar(&cmdlineArgs.SoongOutDir, "soong_out", "", "Soong output directory (usually $TOP/out/soong)")
 	flag.StringVar(&availableEnvFile, "available_env", "", "File containing available environment variables")
 	flag.StringVar(&usedEnvFile, "used_env", "", "File containing used environment variables")
-	flag.StringVar(&globFile, "globFile", "build-globs.ninja", "the Ninja file of globs to output")
-	flag.StringVar(&globListDir, "globListDir", "", "the directory containing the glob list files")
 	flag.StringVar(&cmdlineArgs.OutDir, "out", "", "the ninja builddir directory")
 	flag.StringVar(&cmdlineArgs.ModuleListFile, "l", "", "file that lists filepaths to parse")
 
@@ -206,20 +203,6 @@
 	ctx.Context.PrintJSONGraphAndActions(graphFile, actionsFile)
 }
 
-func writeBuildGlobsNinjaFile(ctx *android.Context) {
-	ctx.EventHandler.Begin("globs_ninja_file")
-	defer ctx.EventHandler.End("globs_ninja_file")
-
-	globDir := bootstrap.GlobDirectory(ctx.Config().SoongOutDir(), globListDir)
-	err := bootstrap.WriteBuildGlobsNinjaFile(&bootstrap.GlobSingleton{
-		GlobLister: ctx.Globs,
-		GlobFile:   globFile,
-		GlobDir:    globDir,
-		SrcDir:     ctx.SrcDir(),
-	}, ctx.Config())
-	maybeQuit(err, "")
-}
-
 func writeDepFile(outputFile string, eventHandler *metrics.EventHandler, ninjaDeps []string) {
 	eventHandler.Begin("ninja_deps")
 	defer eventHandler.End("ninja_deps")
@@ -283,7 +266,9 @@
 }
 
 // runSoongOnlyBuild runs the standard Soong build in a number of different modes.
-func runSoongOnlyBuild(ctx *android.Context, extraNinjaDeps []string) string {
+// It returns the path to the output file (usually the ninja file) and the deps that need
+// to trigger a soong rerun.
+func runSoongOnlyBuild(ctx *android.Context) (string, []string) {
 	ctx.EventHandler.Begin("soong_build")
 	defer ctx.EventHandler.End("soong_build")
 
@@ -299,37 +284,30 @@
 
 	ninjaDeps, err := bootstrap.RunBlueprint(cmdlineArgs.Args, stopBefore, ctx.Context, ctx.Config())
 	maybeQuit(err, "")
-	ninjaDeps = append(ninjaDeps, extraNinjaDeps...)
-
-	writeBuildGlobsNinjaFile(ctx)
 
 	// Convert the Soong module graph into Bazel BUILD files.
 	switch ctx.Config().BuildMode {
 	case android.GenerateQueryView:
 		queryviewMarkerFile := cmdlineArgs.BazelQueryViewDir + ".marker"
 		runQueryView(cmdlineArgs.BazelQueryViewDir, queryviewMarkerFile, ctx)
-		writeDepFile(queryviewMarkerFile, ctx.EventHandler, ninjaDeps)
-		return queryviewMarkerFile
+		return queryviewMarkerFile, ninjaDeps
 	case android.GenerateModuleGraph:
 		writeJsonModuleGraphAndActions(ctx, cmdlineArgs)
-		writeDepFile(cmdlineArgs.ModuleGraphFile, ctx.EventHandler, ninjaDeps)
-		return cmdlineArgs.ModuleGraphFile
+		return cmdlineArgs.ModuleGraphFile, ninjaDeps
 	case android.GenerateDocFile:
 		// TODO: we could make writeDocs() return the list of documentation files
 		// written and add them to the .d file. Then soong_docs would be re-run
 		// whenever one is deleted.
 		err := writeDocs(ctx, shared.JoinPath(topDir, cmdlineArgs.DocFile))
 		maybeQuit(err, "error building Soong documentation")
-		writeDepFile(cmdlineArgs.DocFile, ctx.EventHandler, ninjaDeps)
-		return cmdlineArgs.DocFile
+		return cmdlineArgs.DocFile, ninjaDeps
 	default:
 		// The actual output (build.ninja) was written in the RunBlueprint() call
 		// above
-		writeDepFile(cmdlineArgs.OutFile, ctx.EventHandler, ninjaDeps)
 		if needToWriteNinjaHint(ctx) {
 			writeNinjaHint(ctx)
 		}
-		return cmdlineArgs.OutFile
+		return cmdlineArgs.OutFile, ninjaDeps
 	}
 }
 
@@ -359,6 +337,8 @@
 func main() {
 	flag.Parse()
 
+	soongStartTime := time.Now()
+
 	shared.ReexecWithDelveMaybe(delveListen, delvePath)
 	android.InitSandbox(topDir)
 
@@ -369,13 +349,6 @@
 		configuration.SetAllowMissingDependencies()
 	}
 
-	extraNinjaDeps := []string{configuration.ProductVariablesFileName, usedEnvFile}
-	if shared.IsDebugging() {
-		// Add a non-existent file to the dependencies so that soong_build will rerun when the debugger is
-		// enabled even if it completed successfully.
-		extraNinjaDeps = append(extraNinjaDeps, filepath.Join(configuration.SoongOutDir(), "always_rerun_for_delve"))
-	}
-
 	// Bypass configuration.Getenv, as LOG_DIR does not need to be dependency tracked. By definition, it will
 	// change between every CI build, so tracking it would require re-running Soong for every build.
 	metricsDir := availableEnv["LOG_DIR"]
@@ -393,7 +366,16 @@
 	ctx.SetIncrementalAnalysis(incremental)
 
 	ctx.Register()
-	finalOutputFile := runSoongOnlyBuild(ctx, extraNinjaDeps)
+	finalOutputFile, ninjaDeps := runSoongOnlyBuild(ctx)
+
+	ninjaDeps = append(ninjaDeps, usedEnvFile)
+	if shared.IsDebugging() {
+		// Add a non-existent file to the dependencies so that soong_build will rerun when the debugger is
+		// enabled even if it completed successfully.
+		ninjaDeps = append(ninjaDeps, filepath.Join(configuration.SoongOutDir(), "always_rerun_for_delve"))
+	}
+
+	writeDepFile(finalOutputFile, ctx.EventHandler, ninjaDeps)
 
 	if ctx.GetIncrementalEnabled() {
 		data, err := shared.EnvFileContents(configuration.EnvDeps())
@@ -407,6 +389,9 @@
 
 	writeUsedEnvironmentFile(configuration)
 
+	err = writeGlobFile(ctx.EventHandler, finalOutputFile, ctx.Globs(), soongStartTime)
+	maybeQuit(err, "")
+
 	// Touch the output file so that it's the newest file created by soong_build.
 	// This is necessary because, if soong_build generated any files which
 	// are ninja inputs to the main output file, then ninja would superfluously
@@ -423,18 +408,33 @@
 	data, err := shared.EnvFileContents(configuration.EnvDeps())
 	maybeQuit(err, "error writing used environment file '%s'\n", usedEnvFile)
 
-	if preexistingData, err := os.ReadFile(path); err != nil {
-		if !os.IsNotExist(err) {
-			maybeQuit(err, "error reading used environment file '%s'", usedEnvFile)
-		}
-	} else if bytes.Equal(preexistingData, data) {
-		// used environment file is unchanged
-		return
-	}
-	err = os.WriteFile(path, data, 0666)
+	err = pathtools.WriteFileIfChanged(path, data, 0666)
 	maybeQuit(err, "error writing used environment file '%s'", usedEnvFile)
 }
 
+func writeGlobFile(eventHandler *metrics.EventHandler, finalOutFile string, globs pathtools.MultipleGlobResults, soongStartTime time.Time) error {
+	eventHandler.Begin("writeGlobFile")
+	defer eventHandler.End("writeGlobFile")
+
+	globsFile, err := os.Create(shared.JoinPath(topDir, finalOutFile+".globs"))
+	if err != nil {
+		return err
+	}
+	defer globsFile.Close()
+	globsFileEncoder := json.NewEncoder(globsFile)
+	for _, glob := range globs {
+		if err := globsFileEncoder.Encode(glob); err != nil {
+			return err
+		}
+	}
+
+	return os.WriteFile(
+		shared.JoinPath(topDir, finalOutFile+".globs_time"),
+		[]byte(fmt.Sprintf("%d\n", soongStartTime.UnixMicro())),
+		0666,
+	)
+}
+
 func touch(path string) {
 	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
 	maybeQuit(err, "Error touching '%s'", path)
diff --git a/tests/bootstrap_test.sh b/tests/bootstrap_test.sh
index 2e40950..715f976 100755
--- a/tests/bootstrap_test.sh
+++ b/tests/bootstrap_test.sh
@@ -145,36 +145,19 @@
   run_soong
   local -r ninja_mtime1=$(stat -c "%y" out/soong/build."${target_product}".ninja)
 
-  local glob_deps_file=out/soong/globs/"${target_product}"/0.d
-
   run_soong
   local -r ninja_mtime2=$(stat -c "%y" out/soong/build."${target_product}".ninja)
 
-  # There is an ineffiencency in glob that requires bpglob to rerun once for each glob to update
-  # the entry in the .ninja_log.  It doesn't update the output file, but we can detect the rerun
-  # by checking if the deps file was created.
-  if [ ! -e "$glob_deps_file" ]; then
-    fail "Glob deps file missing after second build"
-  fi
-
-  local -r glob_deps_mtime2=$(stat -c "%y" "$glob_deps_file")
-
   if [[ "$ninja_mtime1" != "$ninja_mtime2" ]]; then
     fail "Ninja file rewritten on null incremental build"
   fi
 
   run_soong
   local -r ninja_mtime3=$(stat -c "%y" out/soong/build."${target_product}".ninja)
-  local -r glob_deps_mtime3=$(stat -c "%y" "$glob_deps_file")
 
   if [[ "$ninja_mtime2" != "$ninja_mtime3" ]]; then
     fail "Ninja file rewritten on null incremental build"
   fi
-
-  # The bpglob commands should not rerun after the first incremental build.
-  if [[ "$glob_deps_mtime2" != "$glob_deps_mtime3" ]]; then
-    fail "Glob deps file rewritten on second null incremental build"
-  fi
 }
 
 function test_add_file_to_glob() {
diff --git a/ui/build/config.go b/ui/build/config.go
index 08e1957..f02222e 100644
--- a/ui/build/config.go
+++ b/ui/build/config.go
@@ -1037,10 +1037,6 @@
 	}
 }
 
-func (c *configImpl) NamedGlobFile(name string) string {
-	return shared.JoinPath(c.SoongOutDir(), "globs-"+name+".ninja")
-}
-
 func (c *configImpl) UsedEnvFile(tag string) string {
 	if v, ok := c.environ.Get("TARGET_PRODUCT"); ok {
 		return shared.JoinPath(c.SoongOutDir(), usedEnvFile+"."+v+c.CoverageSuffix()+"."+tag)
diff --git a/ui/build/soong.go b/ui/build/soong.go
index b94ffa5..76a3e35 100644
--- a/ui/build/soong.go
+++ b/ui/build/soong.go
@@ -15,10 +15,14 @@
 package build
 
 import (
+	"encoding/json"
+	"errors"
 	"fmt"
 	"io/fs"
 	"os"
 	"path/filepath"
+	"runtime"
+	"slices"
 	"strconv"
 	"strings"
 	"sync"
@@ -52,7 +56,7 @@
 
 	// bootstrapEpoch is used to determine if an incremental build is incompatible with the current
 	// version of bootstrap and needs cleaning before continuing the build.  Increment this for
-	// incompatible changes, for example when moving the location of the bpglob binary that is
+	// incompatible changes, for example when moving the location of a microfactory binary that is
 	// executed during bootstrap before the primary builder has had a chance to update the path.
 	bootstrapEpoch = 1
 )
@@ -226,10 +230,6 @@
 
 	var allArgs []string
 	allArgs = append(allArgs, pb.specificArgs...)
-	globPathName := getGlobPathNameFromPrimaryBuilderFactory(config, pb)
-	allArgs = append(allArgs,
-		"--globListDir", globPathName,
-		"--globFile", pb.config.NamedGlobFile(globPathName))
 
 	allArgs = append(allArgs, commonArgs...)
 	allArgs = append(allArgs, environmentArgs(pb.config, pb.name)...)
@@ -241,10 +241,8 @@
 	}
 	allArgs = append(allArgs, "Android.bp")
 
-	globfiles := bootstrap.GlobFileListFiles(bootstrap.GlobDirectory(config.SoongOutDir(), globPathName))
-
 	return bootstrap.PrimaryBuilderInvocation{
-		Implicits:   globfiles,
+		Implicits:   []string{pb.output + ".glob_results"},
 		Outputs:     []string{pb.output},
 		Args:        allArgs,
 		Description: pb.description,
@@ -276,24 +274,15 @@
 				os.Remove(file)
 			}
 		}
-		for _, globFile := range bootstrapGlobFileList(config) {
-			os.Remove(globFile)
-		}
+		os.Remove(soongNinjaFile + ".globs")
+		os.Remove(soongNinjaFile + ".globs_time")
+		os.Remove(soongNinjaFile + ".glob_results")
 
 		// Mark the tree as up to date with the current epoch by writing the epoch marker file.
 		writeEmptyFile(ctx, epochPath)
 	}
 }
 
-func bootstrapGlobFileList(config Config) []string {
-	return []string{
-		config.NamedGlobFile(getGlobPathName(config)),
-		config.NamedGlobFile(jsonModuleGraphTag),
-		config.NamedGlobFile(queryviewTag),
-		config.NamedGlobFile(soongDocsTag),
-	}
-}
-
 func bootstrapBlueprint(ctx Context, config Config) {
 	ctx.BeginTrace(metrics.RunSoong, "blueprint bootstrap")
 	defer ctx.EndTrace()
@@ -411,32 +400,9 @@
 		runGoTests:  !config.skipSoongTests,
 		// If we want to debug soong_build, we need to compile it for debugging
 		debugCompilation:          delvePort != "",
-		subninjas:                 bootstrapGlobFileList(config),
 		primaryBuilderInvocations: invocations,
 	}
 
-	// The glob ninja files are generated during the main build phase. However, the
-	// primary buildifer invocation depends on all of its glob files, even before
-	// it's been run. Generate a "empty" glob ninja file on the first run,
-	// so that the files can be there to satisfy the dependency.
-	for _, pb := range pbfs {
-		globPathName := getGlobPathNameFromPrimaryBuilderFactory(config, pb)
-		globNinjaFile := config.NamedGlobFile(globPathName)
-		if _, err := os.Stat(globNinjaFile); os.IsNotExist(err) {
-			err := bootstrap.WriteBuildGlobsNinjaFile(&bootstrap.GlobSingleton{
-				GlobLister: func() pathtools.MultipleGlobResults { return nil },
-				GlobFile:   globNinjaFile,
-				GlobDir:    bootstrap.GlobDirectory(config.SoongOutDir(), globPathName),
-				SrcDir:     ".",
-			}, blueprintConfig)
-			if err != nil {
-				ctx.Fatal(err)
-			}
-		} else if err != nil {
-			ctx.Fatal(err)
-		}
-	}
-
 	// since `bootstrap.ninja` is regenerated unconditionally, we ignore the deps, i.e. little
 	// reason to write a `bootstrap.ninja.d` file
 	_, err := bootstrap.RunBlueprint(blueprintArgs, bootstrap.DoEverything, blueprintCtx, blueprintConfig)
@@ -614,9 +580,6 @@
 		}
 	}()
 
-	runMicrofactory(ctx, config, "bpglob", "github.com/google/blueprint/bootstrap/bpglob",
-		map[string]string{"github.com/google/blueprint": "build/blueprint"})
-
 	ninja := func(targets ...string) {
 		ctx.BeginTrace(metrics.RunSoong, "bootstrap")
 		defer ctx.EndTrace()
@@ -698,6 +661,12 @@
 		targets = append(targets, config.SoongNinjaFile())
 	}
 
+	for _, target := range targets {
+		if err := checkGlobs(ctx, target); err != nil {
+			ctx.Fatalf("Error checking globs: %s", err.Error())
+		}
+	}
+
 	beforeSoongTimestamp := time.Now()
 
 	ninja(targets...)
@@ -724,6 +693,157 @@
 	}
 }
 
+// checkGlobs manages the globs that cause soong to rerun.
+//
+// When soong_build runs, it will run globs. It will write all the globs
+// it ran into the "{finalOutFile}.globs" file. Then every build,
+// soong_ui will check that file, rerun the globs, and if they changed
+// from the results that soong_build got, update the ".glob_results"
+// file, causing soong_build to rerun. The ".glob_results" file will
+// be empty on the first run of soong_build, because we don't know
+// what the globs are yet, but also remain empty until the globs change
+// so that we don't run soong_build a second time unnecessarily.
+// Both soong_build and soong_ui will also update a ".globs_time" file
+// with the time that they ran at every build. When soong_ui checks
+// globs, it only reruns globs whose dependencies are newer than the
+// time in the ".globs_time" file.
+func checkGlobs(ctx Context, finalOutFile string) error {
+	ctx.BeginTrace(metrics.RunSoong, "check_globs")
+	defer ctx.EndTrace()
+	st := ctx.Status.StartTool()
+	st.Status("Running globs...")
+	defer st.Finish()
+
+	globsFile, err := os.Open(finalOutFile + ".globs")
+	if errors.Is(err, fs.ErrNotExist) {
+		// if the glob file doesn't exist, make sure the glob_results file exists and is empty.
+		if err := os.MkdirAll(filepath.Dir(finalOutFile), 0777); err != nil {
+			return err
+		}
+		f, err := os.Create(finalOutFile + ".glob_results")
+		if err != nil {
+			return err
+		}
+		return f.Close()
+	} else if err != nil {
+		return err
+	}
+	defer globsFile.Close()
+	globsFileDecoder := json.NewDecoder(globsFile)
+
+	globsTimeBytes, err := os.ReadFile(finalOutFile + ".globs_time")
+	if err != nil {
+		return err
+	}
+	globsTimeMicros, err := strconv.ParseInt(strings.TrimSpace(string(globsTimeBytes)), 10, 64)
+	if err != nil {
+		return err
+	}
+	globCheckStartTime := time.Now().UnixMicro()
+
+	globsChan := make(chan pathtools.GlobResult)
+	errorsChan := make(chan error)
+	wg := sync.WaitGroup{}
+	hasChangedGlobs := false
+	for i := 0; i < runtime.NumCPU()*2; i++ {
+		wg.Add(1)
+		go func() {
+			for cachedGlob := range globsChan {
+				// If we've already determined we have changed globs, just finish consuming
+				// the channel without doing any more checks.
+				if hasChangedGlobs {
+					continue
+				}
+				// First, check if any of the deps are newer than the last time globs were checked.
+				// If not, we don't need to rerun the glob.
+				hasNewDep := false
+				for _, dep := range cachedGlob.Deps {
+					info, err := os.Stat(dep)
+					if err != nil {
+						errorsChan <- err
+						continue
+					}
+					if info.ModTime().UnixMicro() > globsTimeMicros {
+						hasNewDep = true
+						break
+					}
+				}
+				if !hasNewDep {
+					continue
+				}
+
+				// Then rerun the glob and check if we got the same result as before.
+				result, err := pathtools.Glob(cachedGlob.Pattern, cachedGlob.Excludes, pathtools.FollowSymlinks)
+				if err != nil {
+					errorsChan <- err
+				} else {
+					if !slices.Equal(result.Matches, cachedGlob.Matches) {
+						hasChangedGlobs = true
+					}
+				}
+			}
+			wg.Done()
+		}()
+	}
+	go func() {
+		wg.Wait()
+		close(errorsChan)
+	}()
+
+	errorsWg := sync.WaitGroup{}
+	errorsWg.Add(1)
+	var errFromGoRoutines error
+	go func() {
+		for result := range errorsChan {
+			if errFromGoRoutines == nil {
+				errFromGoRoutines = result
+			}
+		}
+		errorsWg.Done()
+	}()
+
+	var cachedGlob pathtools.GlobResult
+	for globsFileDecoder.More() {
+		if err := globsFileDecoder.Decode(&cachedGlob); err != nil {
+			return err
+		}
+		// Need to clone the GlobResult because the json decoder will
+		// reuse the same slice allocations.
+		globsChan <- cachedGlob.Clone()
+	}
+	close(globsChan)
+	errorsWg.Wait()
+	if errFromGoRoutines != nil {
+		return errFromGoRoutines
+	}
+
+	// Update the globs_time file whether or not we found changed globs,
+	// so that we don't rerun globs in the future that we just saw didn't change.
+	err = os.WriteFile(
+		finalOutFile+".globs_time",
+		[]byte(fmt.Sprintf("%d\n", globCheckStartTime)),
+		0666,
+	)
+	if err != nil {
+		return err
+	}
+
+	if hasChangedGlobs {
+		fmt.Fprintf(os.Stdout, "Globs changed, rerunning soong...\n")
+		// Write the current time to the glob_results file. We just need
+		// some unique value to trigger a rerun, it doesn't matter what it is.
+		err = os.WriteFile(
+			finalOutFile+".glob_results",
+			[]byte(fmt.Sprintf("%d\n", globCheckStartTime)),
+			0666,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 // loadSoongBuildMetrics reads out/soong_build_metrics.pb if it was generated by soong_build and copies the
 // events stored in it into the soong_ui trace to provide introspection into how long the different phases of
 // soong_build are taking.
diff --git a/ui/build/test_build.go b/ui/build/test_build.go
index 3faa94d..ba53119 100644
--- a/ui/build/test_build.go
+++ b/ui/build/test_build.go
@@ -79,9 +79,6 @@
 	// out/build_date.txt is considered a "source file"
 	buildDatetimeFilePath := filepath.Join(outDir, "build_date.txt")
 
-	// bpglob is built explicitly using Microfactory
-	bpglob := filepath.Join(config.SoongOutDir(), "bpglob")
-
 	// release-config files are generated from the initial lunch or Kati phase
 	// before running soong and ninja.
 	releaseConfigDir := filepath.Join(outDir, "soong", "release-config")
@@ -105,7 +102,6 @@
 			line == extraVariablesFilePath ||
 			line == dexpreoptConfigFilePath ||
 			line == buildDatetimeFilePath ||
-			line == bpglob ||
 			strings.HasPrefix(line, releaseConfigDir) ||
 			buildFingerPrintFilePattern.MatchString(line) {
 			// Leaf node is in one of Soong's bootstrap directories, which do not have