blob: 2853a70bc4c87affe0f01f86965ccae5ece2618a [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050020 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050021 "fmt"
22 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "reflect"
Wei Li455ba832021-11-04 22:58:12 +000024 "regexp"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040025 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050026 "strings"
27
28 "github.com/google/blueprint/proptools"
29)
30
Usta Shrestha6298cc52022-05-27 17:40:21 -040031type artifactId int
32type depsetId int
33type pathFragmentId int
34
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050035// artifact contains relevant portions of Bazel's aquery proto, Artifact.
36// Represents a single artifact, whether it's a source file or a derived output file.
37type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040038 Id artifactId
39 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050040}
41
42type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040043 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050044 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050046}
47
48// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
49type KeyValuePair struct {
50 Key string
51 Value string
52}
53
Chris Parsons1a7aca02022-04-25 22:35:15 -040054// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040055// akin to the `depSetOfFiles` in the response proto, except:
56// * direct artifacts are enumerated by full path instead of by ID
MarkDacek75641272022-05-13 20:44:07 +000057// * it has a hash of the depset contents, instead of an int ID (for determinism)
Chris Parsons1a7aca02022-04-25 22:35:15 -040058// A depset is a data structure for efficient transitive handling of artifact
59// paths. A single depset consists of one or more artifact paths and one or
60// more "child" depsets.
61type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040062 ContentHash string
63 DirectArtifacts []string
64 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040065}
66
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050067// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
68// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
69// data structure for storing large numbers of file paths.
70type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040071 Id depsetId
72 DirectArtifactIds []artifactId
73 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050074}
75
76// action contains relevant portions of Bazel's aquery proto, Action.
77// Represents a single command line invocation in the Bazel build graph.
78type action struct {
79 Arguments []string
80 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040081 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050082 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040083 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000084 TemplateContent string
85 Substitutions []KeyValuePair
Sasha Smundak1da064c2022-06-08 16:36:16 -070086 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050087}
88
89// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
90// An aquery response from Bazel contains a single ActionGraphContainer proto.
91type actionGraphContainer struct {
92 Artifacts []artifact
93 Actions []action
94 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050095 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050096}
97
98// BuildStatement contains information to register a build statement corresponding (one to one)
99// with a Bazel action from Bazel's action graph.
100type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400101 Command string
102 Depfile *string
103 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400104 SymlinkPaths []string
105 Env []KeyValuePair
106 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400107
108 // Inputs of this build statement, either as unexpanded depsets or expanded
109 // input paths. There should be no overlap between these fields; an input
110 // path should either be included as part of an unexpanded depset or a raw
111 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400112 InputDepsetHashes []string
113 InputPaths []string
Sasha Smundak1da064c2022-06-08 16:36:16 -0700114 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500115}
116
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400117// A helper type for aquery processing which facilitates retrieval of path IDs from their
118// less readable Bazel structures (depset and path fragment).
119type aqueryArtifactHandler struct {
Usta Shresthaef922252022-06-02 14:23:02 -0400120 // Switches to true if any depset contains only `bazelToolsDependencySentinel`
121 bazelToolsDependencySentinelNeeded bool
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400122 // Maps depset id to AqueryDepset, a representation of depset which is
123 // post-processed for middleman artifact handling, unhandled artifact
124 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400125 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400126 // Maps content hash to AqueryDepset.
127 depsetHashToAqueryDepset map[string]AqueryDepset
128
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400129 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
130 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400131 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400132 // Maps artifact ids to fully expanded paths.
133 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400134}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500135
Wei Li455ba832021-11-04 22:58:12 +0000136// The tokens should be substituted with the value specified here, instead of the
137// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400138var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000139 // Uses "python3" for %python_binary% instead of the value returned by aquery
140 // which is "py3wrapper.sh". See removePy3wrapperScript.
141 "%python_binary%": "python3",
142}
143
144// This pattern matches the MANIFEST file created for a py_binary target.
145var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
146
147// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400148const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000149
Usta Shresthaef922252022-06-02 14:23:02 -0400150// A file to be put into depsets that are otherwise empty
151const bazelToolsDependencySentinel = "BAZEL_TOOLS_DEPENDENCY_SENTINEL"
152
Usta Shrestha6298cc52022-05-27 17:40:21 -0400153func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
154 m := map[K]V{}
155 for _, v := range values {
156 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500157 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400158 return m
159}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400160
Usta Shrestha6298cc52022-05-27 17:40:21 -0400161func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
162 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
163 return pf.Id
164 })
165
166 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500167 for _, artifact := range aqueryResult.Artifacts {
168 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
169 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500170 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500171 }
172 artifactIdToPath[artifact.Id] = artifactPath
173 }
Chris Parsons943f2432021-01-19 11:36:50 -0500174
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400175 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400176 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400177 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400178 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
179 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400180 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500181 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500182 if actionEntry.Mnemonic == "Middleman" {
183 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400184 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500185 }
186 }
187 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400188
Usta Shrestha6298cc52022-05-27 17:40:21 -0400189 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
190 return d.Id
191 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400192
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400193 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400194 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400195 depsetHashToAqueryDepset: map[string]AqueryDepset{},
196 depsetHashToArtifactPathsCache: map[string][]string{},
197 artifactIdToPath: artifactIdToPath,
198 }
199
200 // Validate and adjust aqueryResult.DepSetOfFiles values.
201 for _, depset := range aqueryResult.DepSetOfFiles {
202 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
203 if err != nil {
204 return nil, err
205 }
206 }
207
208 return &aqueryHandler, nil
209}
210
211// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
212// depset.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400213func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400214 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
215 return aqueryDepset, nil
216 }
217 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400218 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400219 for _, artifactId := range depset.DirectArtifactIds {
220 path, pathExists := a.artifactIdToPath[artifactId]
221 if !pathExists {
222 return AqueryDepset{}, fmt.Errorf("undefined input artifactId %d", artifactId)
223 }
224 // Filter out any inputs which are universally dropped, and swap middleman
225 // artifacts with their corresponding depsets.
226 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
227 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
228 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400229 } else if strings.HasSuffix(path, py3wrapperFileName) ||
230 manifestFilePattern.MatchString(path) ||
231 strings.HasPrefix(path, "../bazel_tools") {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400232 // Drop these artifacts.
233 // See go/python-binary-host-mixed-build for more details.
234 // 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
235 // Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
236 // 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
237 // but it doesn't contain sufficient information so no Ninja build statements are generated
238 // for creating it.
239 // So in mixed build mode, when these two are used as input of some Ninja build statement,
240 // since there is no build statement to create them, they should be removed from input paths.
241 // TODO(b/197135294): Clean up this custom runfiles handling logic when
242 // SourceSymlinkManifest and SymlinkTree actions are supported.
Usta Shresthaef922252022-06-02 14:23:02 -0400243 // 3) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
244 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400245 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400246 directArtifactPaths = append(directArtifactPaths, path)
247 }
248 }
249
Usta Shrestha6298cc52022-05-27 17:40:21 -0400250 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400251 for _, childDepsetId := range transitiveDepsetIds {
252 childDepset, exists := depsetIdToDepset[childDepsetId]
253 if !exists {
254 return AqueryDepset{}, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
255 }
256 childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset)
257 if err != nil {
258 return AqueryDepset{}, err
259 }
260 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
261 }
Usta Shresthaef922252022-06-02 14:23:02 -0400262 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
263 // We could omit this depset altogether but that requires cleanup on
264 // transitive dependents.
265 // As a simpler alternative, we use this sentinel file as a dependency.
266 directArtifactPaths = append(directArtifactPaths, bazelToolsDependencySentinel)
267 a.bazelToolsDependencySentinelNeeded = true
268 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400269 aqueryDepset := AqueryDepset{
270 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
271 DirectArtifacts: directArtifactPaths,
272 TransitiveDepSetHashes: childDepsetHashes,
273 }
274 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
275 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
276 return aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400277}
278
Chris Parsons1a7aca02022-04-25 22:35:15 -0400279// getInputPaths flattens the depsets of the given IDs and returns all transitive
280// input paths contained in these depsets.
281// This is a potentially expensive operation, and should not be invoked except
282// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400283func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
284 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400285
286 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400287 depset := a.depsetIdToAqueryDepset[inputDepSetId]
288 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400289 if err != nil {
290 return nil, err
291 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400292 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400293 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400294 }
295 }
Wei Li455ba832021-11-04 22:58:12 +0000296
Chris Parsons1a7aca02022-04-25 22:35:15 -0400297 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400298}
299
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400300func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
301 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400302 return result, nil
303 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400304 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
305 result := depset.DirectArtifacts
306 for _, childHash := range depset.TransitiveDepSetHashes {
307 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400308 if err != nil {
309 return nil, err
310 }
311 result = append(result, childArtifactIds...)
312 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400313 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400314 return result, nil
315 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400316 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400317 }
318}
319
Chris Parsons1a7aca02022-04-25 22:35:15 -0400320// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400321// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400322// action graph, as described by the given action graph json proto.
323// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
324// are one-to-one with Bazel's depSetOfFiles objects.
325func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400326 var aqueryResult actionGraphContainer
327 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
328 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400329 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400330 }
331 aqueryHandler, err := newAqueryHandler(aqueryResult)
332 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400333 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400334 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500335
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400336 var buildStatements []BuildStatement
Usta Shresthaef922252022-06-02 14:23:02 -0400337 if aqueryHandler.bazelToolsDependencySentinelNeeded {
338 buildStatements = append(buildStatements, BuildStatement{
339 Command: fmt.Sprintf("touch '%s'", bazelToolsDependencySentinel),
340 OutputPaths: []string{bazelToolsDependencySentinel},
341 Mnemonic: bazelToolsDependencySentinel,
342 })
343 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400344
Chris Parsons8d6e4332021-02-22 16:13:50 -0500345 for _, actionEntry := range aqueryResult.Actions {
346 if shouldSkipAction(actionEntry) {
347 continue
348 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400349
Chris Parsons1a7aca02022-04-25 22:35:15 -0400350 var buildStatement BuildStatement
Sasha Smundak1da064c2022-06-08 16:36:16 -0700351 if actionEntry.isSymlinkAction() {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400352 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Sasha Smundak1da064c2022-06-08 16:36:16 -0700353 } else if actionEntry.isTemplateExpandAction() && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400354 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Sasha Smundak1da064c2022-06-08 16:36:16 -0700355 } else if actionEntry.isPythonZipperAction() {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400356 buildStatement, err = aqueryHandler.pythonZipperActionBuildStatement(actionEntry, buildStatements)
Sasha Smundak1da064c2022-06-08 16:36:16 -0700357 } else if actionEntry.isFileWriteAction() {
358 buildStatement, err = aqueryHandler.fileWriteActionBuildStatement(actionEntry)
Liz Kammerc49e6822021-06-08 15:04:11 -0400359 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400360 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
361 } else {
362 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
363 }
364
365 if err != nil {
366 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500367 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500368 buildStatements = append(buildStatements, buildStatement)
369 }
370
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400371 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400372 var depsets []AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400373 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
374 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
375 // Two depsets collide on hash. Ensure that their contents are identical.
376 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
Usta Shrestha16ac1352022-06-22 11:01:55 -0400377 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400378 }
379 } else {
380 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
381 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400382 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400383 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400384
385 // Build Statements and depsets must be sorted by their content hash to
386 // preserve determinism between builds (this will result in consistent ninja file
387 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
388 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
389 sort.Slice(buildStatements, func(i, j int) bool {
390 // For build statements, compare output lists. In Bazel, each output file
391 // may only have one action which generates it, so this will provide
392 // a deterministic ordering.
393 outputs_i := buildStatements[i].OutputPaths
394 outputs_j := buildStatements[j].OutputPaths
395 if len(outputs_i) != len(outputs_j) {
396 return len(outputs_i) < len(outputs_j)
397 }
398 if len(outputs_i) == 0 {
399 // No outputs for these actions, so compare commands.
400 return buildStatements[i].Command < buildStatements[j].Command
401 }
402 // There may be multiple outputs, but the output ordering is deterministic.
403 return outputs_i[0] < outputs_j[0]
404 })
405 sort.Slice(depsets, func(i, j int) bool {
406 return depsets[i].ContentHash < depsets[j].ContentHash
407 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400408 return buildStatements, depsets, nil
409}
410
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400411// depsetContentHash computes and returns a SHA256 checksum of the contents of
412// the given depset. This content hash may serve as the depset's identifier.
413// Using a content hash for an identifier is superior for determinism. (For example,
414// using an integer identifier which depends on the order in which the depsets are
415// created would result in nondeterministic depset IDs.)
416func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
417 h := sha256.New()
418 // Use newline as delimiter, as paths cannot contain newline.
419 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400420 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
421 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400422 return fullHash
423}
424
Usta Shrestha6298cc52022-05-27 17:40:21 -0400425func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
426 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400427 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400428 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400429 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400430 } else {
431 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400432 }
433 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400434 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400435}
436
Usta Shresthac2372492022-05-27 10:45:00 -0400437func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400438 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400439 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400440 if err != nil {
441 return BuildStatement{}, err
442 }
Usta Shresthac2372492022-05-27 10:45:00 -0400443 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400444 if err != nil {
445 return BuildStatement{}, err
446 }
447
448 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400449 Command: command,
450 Depfile: depfile,
451 OutputPaths: outputPaths,
452 InputDepsetHashes: inputDepsetHashes,
453 Env: actionEntry.EnvironmentVariables,
454 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400455 }
456 return buildStatement, nil
457}
458
Usta Shresthac2372492022-05-27 10:45:00 -0400459func (a *aqueryArtifactHandler) pythonZipperActionBuildStatement(actionEntry action, prevBuildStatements []BuildStatement) (BuildStatement, error) {
460 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400461 if err != nil {
462 return BuildStatement{}, err
463 }
Usta Shresthac2372492022-05-27 10:45:00 -0400464 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400465 if err != nil {
466 return BuildStatement{}, err
467 }
468
469 if len(inputPaths) < 1 || len(outputPaths) != 1 {
470 return BuildStatement{}, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
471 }
472 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
473 inputPaths, command = removePy3wrapperScript(inputPaths, command)
Usta Shresthaef922252022-06-02 14:23:02 -0400474 command = addCommandForPyBinaryRunfilesDir(command, outputPaths[0])
Chris Parsons1a7aca02022-04-25 22:35:15 -0400475 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
476 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
477 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
478 //
479 // The following logic relies on that Bazel aquery output returns actions in the order that
480 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
481 // in that order, the following logic might not find the build statement generated for Python binary
482 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
483 // See go/python-binary-host-mixed-build for more details.
484 pythonZipFilePath := outputPaths[0]
485 pyBinaryFound := false
Usta Shrestha6298cc52022-05-27 17:40:21 -0400486 for i := range prevBuildStatements {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400487 if len(prevBuildStatements[i].OutputPaths) == 1 && prevBuildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
488 prevBuildStatements[i].InputPaths = append(prevBuildStatements[i].InputPaths, pythonZipFilePath)
489 pyBinaryFound = true
490 }
491 }
492 if !pyBinaryFound {
493 return BuildStatement{}, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
494 }
495
496 buildStatement := BuildStatement{
497 Command: command,
498 Depfile: depfile,
499 OutputPaths: outputPaths,
500 InputPaths: inputPaths,
501 Env: actionEntry.EnvironmentVariables,
502 Mnemonic: actionEntry.Mnemonic,
503 }
504 return buildStatement, nil
505}
506
Usta Shresthac2372492022-05-27 10:45:00 -0400507func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
508 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400509 if err != nil {
510 return BuildStatement{}, err
511 }
512 if len(outputPaths) != 1 {
513 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
514 }
515 expandedTemplateContent := expandTemplateContent(actionEntry)
516 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
517 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
518 // change \n to space and mess up the format of Python programs.
519 // sed is used to convert \\n back to \n before saving to output file.
520 // See go/python-binary-host-mixed-build for more details.
521 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
522 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400523 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400524 if err != nil {
525 return BuildStatement{}, err
526 }
527
528 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400529 Command: command,
530 Depfile: depfile,
531 OutputPaths: outputPaths,
532 InputDepsetHashes: inputDepsetHashes,
533 Env: actionEntry.EnvironmentVariables,
534 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400535 }
536 return buildStatement, nil
537}
538
Sasha Smundak1da064c2022-06-08 16:36:16 -0700539func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry action) (BuildStatement, error) {
540 outputPaths, _, err := a.getOutputPaths(actionEntry)
541 var depsetHashes []string
542 if err == nil {
543 depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
544 }
545 if err != nil {
546 return BuildStatement{}, err
547 }
548 return BuildStatement{
549 Depfile: nil,
550 OutputPaths: outputPaths,
551 Env: actionEntry.EnvironmentVariables,
552 Mnemonic: actionEntry.Mnemonic,
553 InputDepsetHashes: depsetHashes,
554 FileContents: actionEntry.FileContents,
555 }, nil
556}
557
Usta Shresthac2372492022-05-27 10:45:00 -0400558func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
559 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400560 if err != nil {
561 return BuildStatement{}, err
562 }
563
Usta Shresthac2372492022-05-27 10:45:00 -0400564 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400565 if err != nil {
566 return BuildStatement{}, err
567 }
568 if len(inputPaths) != 1 || len(outputPaths) != 1 {
569 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
570 }
571 out := outputPaths[0]
572 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
573 out = proptools.ShellEscapeIncludingSpaces(out)
574 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
575 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
576 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
577 symlinkPaths := outputPaths[:]
578
579 buildStatement := BuildStatement{
580 Command: command,
581 Depfile: depfile,
582 OutputPaths: outputPaths,
583 InputPaths: inputPaths,
584 Env: actionEntry.EnvironmentVariables,
585 Mnemonic: actionEntry.Mnemonic,
586 SymlinkPaths: symlinkPaths,
587 }
588 return buildStatement, nil
589}
590
Usta Shresthac2372492022-05-27 10:45:00 -0400591func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400592 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400593 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400594 if !exists {
595 err = fmt.Errorf("undefined outputId %d", outputId)
596 return
597 }
598 ext := filepath.Ext(outputPath)
599 if ext == ".d" {
600 if depfile != nil {
601 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
602 return
603 } else {
604 depfile = &outputPath
605 }
606 } else {
607 outputPaths = append(outputPaths, outputPath)
608 }
609 }
610 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500611}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500612
Wei Li455ba832021-11-04 22:58:12 +0000613// expandTemplateContent substitutes the tokens in a template.
614func expandTemplateContent(actionEntry action) string {
615 replacerString := []string{}
616 for _, pair := range actionEntry.Substitutions {
617 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400618 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000619 value = val
620 }
621 replacerString = append(replacerString, pair.Key, value)
622 }
623 replacer := strings.NewReplacer(replacerString...)
624 return replacer.Replace(actionEntry.TemplateContent)
625}
626
627func escapeCommandlineArgument(str string) string {
628 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
629 replacer := strings.NewReplacer(
630 `\`, `\\`,
631 `$`, `\$`,
632 "`", "\\`",
633 `"`, `\"`,
634 "\n", "\\n",
635 `'`, `'"'"'`,
636 )
637 return replacer.Replace(str)
638}
639
640// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
641// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
642// there is no action returned by aquery for creating it. So in mixed build "python3" is used
643// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
644// removed from input paths and command of creating python zip file.
645// See go/python-binary-host-mixed-build for more details.
646// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400647func removePy3wrapperScript(inputPaths []string, command string) (newInputPaths []string, newCommand string) {
Wei Li455ba832021-11-04 22:58:12 +0000648 // Remove from inputs
Usta Shresthaef922252022-06-02 14:23:02 -0400649 var filteredInputPaths []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400650 for _, path := range inputPaths {
Wei Li455ba832021-11-04 22:58:12 +0000651 if !strings.HasSuffix(path, py3wrapperFileName) {
652 filteredInputPaths = append(filteredInputPaths, path)
653 }
654 }
655 newInputPaths = filteredInputPaths
656
657 // Remove from command line
658 var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400659 newCommand = re.ReplaceAllString(command, "")
Wei Li455ba832021-11-04 22:58:12 +0000660 return
661}
662
663// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
664// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
665// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
666// but since SourceSymlinkManifest doesn't contain sufficient information
667// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
668// See go/python-binary-host-mixed-build for more details.
669// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
Usta Shresthaef922252022-06-02 14:23:02 -0400670func addCommandForPyBinaryRunfilesDir(oldCommand string, zipFilePath string) string {
Wei Li455ba832021-11-04 22:58:12 +0000671 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
672 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
Usta Shresthaef922252022-06-02 14:23:02 -0400673 command := fmt.Sprintf("%s x %s -d %s", "../bazel_tools/tools/zip/zipper/zipper", zipFilePath, runfilesDirName)
Wei Li455ba832021-11-04 22:58:12 +0000674 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
675 // when running the python binary stub script.
676 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400677 return oldCommand + " && " + command
Wei Li455ba832021-11-04 22:58:12 +0000678}
679
Sasha Smundak1da064c2022-06-08 16:36:16 -0700680func (a action) isSymlinkAction() bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000681 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400682}
683
Sasha Smundak1da064c2022-06-08 16:36:16 -0700684func (a action) isTemplateExpandAction() bool {
Wei Li455ba832021-11-04 22:58:12 +0000685 return a.Mnemonic == "TemplateExpand"
686}
687
Sasha Smundak1da064c2022-06-08 16:36:16 -0700688func (a action) isPythonZipperAction() bool {
Wei Li455ba832021-11-04 22:58:12 +0000689 return a.Mnemonic == "PythonZipper"
690}
691
Sasha Smundak1da064c2022-06-08 16:36:16 -0700692func (a action) isFileWriteAction() bool {
693 return a.Mnemonic == "FileWrite" || a.Mnemonic == "SourceSymlinkManifest"
694}
695
Chris Parsons8d6e4332021-02-22 16:13:50 -0500696func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400697 // TODO(b/180945121): Handle complex symlink actions.
Sasha Smundak1da064c2022-06-08 16:36:16 -0700698 if a.Mnemonic == "SymlinkTree" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500699 return true
700 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400701 // Middleman actions are not handled like other actions; they are handled separately as a
702 // preparatory step so that their inputs may be relayed to actions depending on middleman
703 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500704 if a.Mnemonic == "Middleman" {
705 return true
706 }
707 // Skip "Fail" actions, which are placeholder actions designed to always fail.
708 if a.Mnemonic == "Fail" {
709 return true
710 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700711 if a.Mnemonic == "BaselineCoverage" {
712 return true
713 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500714 return false
715}
716
Usta Shrestha6298cc52022-05-27 17:40:21 -0400717func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
718 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500719 currId := id
720 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
721 for currId > 0 {
722 currFragment, ok := pathFragmentsMap[currId]
723 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500724 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500725 }
726 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400727 if currId == currFragment.ParentId {
728 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
729 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500730 currId = currFragment.ParentId
731 }
732 return filepath.Join(labels...), nil
733}