blob: 6829698c0b9b753c4327d8351c728ea978aa2751 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050020 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050021 "fmt"
22 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "reflect"
Wei Li455ba832021-11-04 22:58:12 +000024 "regexp"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040025 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050026 "strings"
27
28 "github.com/google/blueprint/proptools"
29)
30
Usta Shrestha6298cc52022-05-27 17:40:21 -040031type artifactId int
32type depsetId int
33type pathFragmentId int
34
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050035// artifact contains relevant portions of Bazel's aquery proto, Artifact.
36// Represents a single artifact, whether it's a source file or a derived output file.
37type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040038 Id artifactId
39 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050040}
41
42type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040043 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050044 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050046}
47
48// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
49type KeyValuePair struct {
50 Key string
51 Value string
52}
53
Chris Parsons1a7aca02022-04-25 22:35:15 -040054// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040055// akin to the `depSetOfFiles` in the response proto, except:
56// * direct artifacts are enumerated by full path instead of by ID
MarkDacek75641272022-05-13 20:44:07 +000057// * it has a hash of the depset contents, instead of an int ID (for determinism)
Chris Parsons1a7aca02022-04-25 22:35:15 -040058// A depset is a data structure for efficient transitive handling of artifact
59// paths. A single depset consists of one or more artifact paths and one or
60// more "child" depsets.
61type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040062 ContentHash string
63 DirectArtifacts []string
64 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040065}
66
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050067// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
68// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
69// data structure for storing large numbers of file paths.
70type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040071 Id depsetId
72 DirectArtifactIds []artifactId
73 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050074}
75
76// action contains relevant portions of Bazel's aquery proto, Action.
77// Represents a single command line invocation in the Bazel build graph.
78type action struct {
79 Arguments []string
80 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040081 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050082 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040083 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000084 TemplateContent string
85 Substitutions []KeyValuePair
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050086}
87
88// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
89// An aquery response from Bazel contains a single ActionGraphContainer proto.
90type actionGraphContainer struct {
91 Artifacts []artifact
92 Actions []action
93 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050094 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050095}
96
97// BuildStatement contains information to register a build statement corresponding (one to one)
98// with a Bazel action from Bazel's action graph.
99type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400100 Command string
101 Depfile *string
102 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400103 SymlinkPaths []string
104 Env []KeyValuePair
105 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400106
107 // Inputs of this build statement, either as unexpanded depsets or expanded
108 // input paths. There should be no overlap between these fields; an input
109 // path should either be included as part of an unexpanded depset or a raw
110 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400111 InputDepsetHashes []string
112 InputPaths []string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500113}
114
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400115// A helper type for aquery processing which facilitates retrieval of path IDs from their
116// less readable Bazel structures (depset and path fragment).
117type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400118 // Maps depset id to AqueryDepset, a representation of depset which is
119 // post-processed for middleman artifact handling, unhandled artifact
120 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400121 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400122 // Maps content hash to AqueryDepset.
123 depsetHashToAqueryDepset map[string]AqueryDepset
124
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400125 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
126 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400127 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400128 // Maps artifact ids to fully expanded paths.
129 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400130}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500131
Wei Li455ba832021-11-04 22:58:12 +0000132// The tokens should be substituted with the value specified here, instead of the
133// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400134var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000135 // Uses "python3" for %python_binary% instead of the value returned by aquery
136 // which is "py3wrapper.sh". See removePy3wrapperScript.
137 "%python_binary%": "python3",
138}
139
140// This pattern matches the MANIFEST file created for a py_binary target.
141var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
142
143// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400144const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000145
Usta Shrestha6298cc52022-05-27 17:40:21 -0400146func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
147 m := map[K]V{}
148 for _, v := range values {
149 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500150 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400151 return m
152}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400153
Usta Shrestha6298cc52022-05-27 17:40:21 -0400154func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
155 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
156 return pf.Id
157 })
158
159 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500160 for _, artifact := range aqueryResult.Artifacts {
161 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
162 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500163 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500164 }
165 artifactIdToPath[artifact.Id] = artifactPath
166 }
Chris Parsons943f2432021-01-19 11:36:50 -0500167
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400168 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400169 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400170 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400171 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
172 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400173 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500174 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500175 if actionEntry.Mnemonic == "Middleman" {
176 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400177 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500178 }
179 }
180 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400181
Usta Shrestha6298cc52022-05-27 17:40:21 -0400182 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
183 return d.Id
184 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400185
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400186 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400187 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400188 depsetHashToAqueryDepset: map[string]AqueryDepset{},
189 depsetHashToArtifactPathsCache: map[string][]string{},
190 artifactIdToPath: artifactIdToPath,
191 }
192
193 // Validate and adjust aqueryResult.DepSetOfFiles values.
194 for _, depset := range aqueryResult.DepSetOfFiles {
195 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
196 if err != nil {
197 return nil, err
198 }
199 }
200
201 return &aqueryHandler, nil
202}
203
204// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
205// depset.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400206func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400207 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
208 return aqueryDepset, nil
209 }
210 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400211 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400212 for _, artifactId := range depset.DirectArtifactIds {
213 path, pathExists := a.artifactIdToPath[artifactId]
214 if !pathExists {
215 return AqueryDepset{}, fmt.Errorf("undefined input artifactId %d", artifactId)
216 }
217 // Filter out any inputs which are universally dropped, and swap middleman
218 // artifacts with their corresponding depsets.
219 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
220 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
221 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
222 } else if strings.HasSuffix(path, py3wrapperFileName) || manifestFilePattern.MatchString(path) {
223 // Drop these artifacts.
224 // See go/python-binary-host-mixed-build for more details.
225 // 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
226 // Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
227 // 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
228 // but it doesn't contain sufficient information so no Ninja build statements are generated
229 // for creating it.
230 // So in mixed build mode, when these two are used as input of some Ninja build statement,
231 // since there is no build statement to create them, they should be removed from input paths.
232 // TODO(b/197135294): Clean up this custom runfiles handling logic when
233 // SourceSymlinkManifest and SymlinkTree actions are supported.
234 } else {
235 // TODO(b/216194240): Filter out bazel tools.
236 directArtifactPaths = append(directArtifactPaths, path)
237 }
238 }
239
Usta Shrestha6298cc52022-05-27 17:40:21 -0400240 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400241 for _, childDepsetId := range transitiveDepsetIds {
242 childDepset, exists := depsetIdToDepset[childDepsetId]
243 if !exists {
244 return AqueryDepset{}, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
245 }
246 childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset)
247 if err != nil {
248 return AqueryDepset{}, err
249 }
250 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
251 }
252 aqueryDepset := AqueryDepset{
253 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
254 DirectArtifacts: directArtifactPaths,
255 TransitiveDepSetHashes: childDepsetHashes,
256 }
257 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
258 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
259 return aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400260}
261
Chris Parsons1a7aca02022-04-25 22:35:15 -0400262// getInputPaths flattens the depsets of the given IDs and returns all transitive
263// input paths contained in these depsets.
264// This is a potentially expensive operation, and should not be invoked except
265// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400266func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
267 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400268
269 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400270 depset := a.depsetIdToAqueryDepset[inputDepSetId]
271 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400272 if err != nil {
273 return nil, err
274 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400275 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400276 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400277 }
278 }
Wei Li455ba832021-11-04 22:58:12 +0000279
Chris Parsons1a7aca02022-04-25 22:35:15 -0400280 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400281}
282
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400283func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
284 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400285 return result, nil
286 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400287 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
288 result := depset.DirectArtifacts
289 for _, childHash := range depset.TransitiveDepSetHashes {
290 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400291 if err != nil {
292 return nil, err
293 }
294 result = append(result, childArtifactIds...)
295 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400296 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400297 return result, nil
298 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400299 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400300 }
301}
302
Chris Parsons1a7aca02022-04-25 22:35:15 -0400303// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400304// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400305// action graph, as described by the given action graph json proto.
306// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
307// are one-to-one with Bazel's depSetOfFiles objects.
308func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400309 var aqueryResult actionGraphContainer
310 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
311 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400312 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400313 }
314 aqueryHandler, err := newAqueryHandler(aqueryResult)
315 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400316 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400317 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500318
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400319 var buildStatements []BuildStatement
320
Chris Parsons8d6e4332021-02-22 16:13:50 -0500321 for _, actionEntry := range aqueryResult.Actions {
322 if shouldSkipAction(actionEntry) {
323 continue
324 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400325
Chris Parsons1a7aca02022-04-25 22:35:15 -0400326 var buildStatement BuildStatement
Liz Kammerc49e6822021-06-08 15:04:11 -0400327 if isSymlinkAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400328 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000329 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400330 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000331 } else if isPythonZipperAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400332 buildStatement, err = aqueryHandler.pythonZipperActionBuildStatement(actionEntry, buildStatements)
Liz Kammerc49e6822021-06-08 15:04:11 -0400333 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400334 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
335 } else {
336 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
337 }
338
339 if err != nil {
340 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500341 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500342 buildStatements = append(buildStatements, buildStatement)
343 }
344
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400345 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400346 var depsets []AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400347 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
348 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
349 // Two depsets collide on hash. Ensure that their contents are identical.
350 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
Usta Shrestha16ac1352022-06-22 11:01:55 -0400351 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400352 }
353 } else {
354 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
355 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400356 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400357 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400358
359 // Build Statements and depsets must be sorted by their content hash to
360 // preserve determinism between builds (this will result in consistent ninja file
361 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
362 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
363 sort.Slice(buildStatements, func(i, j int) bool {
364 // For build statements, compare output lists. In Bazel, each output file
365 // may only have one action which generates it, so this will provide
366 // a deterministic ordering.
367 outputs_i := buildStatements[i].OutputPaths
368 outputs_j := buildStatements[j].OutputPaths
369 if len(outputs_i) != len(outputs_j) {
370 return len(outputs_i) < len(outputs_j)
371 }
372 if len(outputs_i) == 0 {
373 // No outputs for these actions, so compare commands.
374 return buildStatements[i].Command < buildStatements[j].Command
375 }
376 // There may be multiple outputs, but the output ordering is deterministic.
377 return outputs_i[0] < outputs_j[0]
378 })
379 sort.Slice(depsets, func(i, j int) bool {
380 return depsets[i].ContentHash < depsets[j].ContentHash
381 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400382 return buildStatements, depsets, nil
383}
384
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400385// depsetContentHash computes and returns a SHA256 checksum of the contents of
386// the given depset. This content hash may serve as the depset's identifier.
387// Using a content hash for an identifier is superior for determinism. (For example,
388// using an integer identifier which depends on the order in which the depsets are
389// created would result in nondeterministic depset IDs.)
390func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
391 h := sha256.New()
392 // Use newline as delimiter, as paths cannot contain newline.
393 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400394 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
395 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400396 return fullHash
397}
398
Usta Shrestha6298cc52022-05-27 17:40:21 -0400399func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
400 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400401 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400402 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400403 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400404 } else {
405 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400406 }
407 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400408 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400409}
410
Usta Shresthac2372492022-05-27 10:45:00 -0400411func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400412 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400413 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400414 if err != nil {
415 return BuildStatement{}, err
416 }
Usta Shresthac2372492022-05-27 10:45:00 -0400417 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400418 if err != nil {
419 return BuildStatement{}, err
420 }
421
422 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400423 Command: command,
424 Depfile: depfile,
425 OutputPaths: outputPaths,
426 InputDepsetHashes: inputDepsetHashes,
427 Env: actionEntry.EnvironmentVariables,
428 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400429 }
430 return buildStatement, nil
431}
432
Usta Shresthac2372492022-05-27 10:45:00 -0400433func (a *aqueryArtifactHandler) pythonZipperActionBuildStatement(actionEntry action, prevBuildStatements []BuildStatement) (BuildStatement, error) {
434 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400435 if err != nil {
436 return BuildStatement{}, err
437 }
Usta Shresthac2372492022-05-27 10:45:00 -0400438 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400439 if err != nil {
440 return BuildStatement{}, err
441 }
442
443 if len(inputPaths) < 1 || len(outputPaths) != 1 {
444 return BuildStatement{}, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
445 }
446 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
447 inputPaths, command = removePy3wrapperScript(inputPaths, command)
448 command = addCommandForPyBinaryRunfilesDir(command, inputPaths[0], outputPaths[0])
449 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
450 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
451 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
452 //
453 // The following logic relies on that Bazel aquery output returns actions in the order that
454 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
455 // in that order, the following logic might not find the build statement generated for Python binary
456 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
457 // See go/python-binary-host-mixed-build for more details.
458 pythonZipFilePath := outputPaths[0]
459 pyBinaryFound := false
Usta Shrestha6298cc52022-05-27 17:40:21 -0400460 for i := range prevBuildStatements {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400461 if len(prevBuildStatements[i].OutputPaths) == 1 && prevBuildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
462 prevBuildStatements[i].InputPaths = append(prevBuildStatements[i].InputPaths, pythonZipFilePath)
463 pyBinaryFound = true
464 }
465 }
466 if !pyBinaryFound {
467 return BuildStatement{}, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
468 }
469
470 buildStatement := BuildStatement{
471 Command: command,
472 Depfile: depfile,
473 OutputPaths: outputPaths,
474 InputPaths: inputPaths,
475 Env: actionEntry.EnvironmentVariables,
476 Mnemonic: actionEntry.Mnemonic,
477 }
478 return buildStatement, nil
479}
480
Usta Shresthac2372492022-05-27 10:45:00 -0400481func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
482 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400483 if err != nil {
484 return BuildStatement{}, err
485 }
486 if len(outputPaths) != 1 {
487 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
488 }
489 expandedTemplateContent := expandTemplateContent(actionEntry)
490 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
491 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
492 // change \n to space and mess up the format of Python programs.
493 // sed is used to convert \\n back to \n before saving to output file.
494 // See go/python-binary-host-mixed-build for more details.
495 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
496 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400497 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400498 if err != nil {
499 return BuildStatement{}, err
500 }
501
502 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400503 Command: command,
504 Depfile: depfile,
505 OutputPaths: outputPaths,
506 InputDepsetHashes: inputDepsetHashes,
507 Env: actionEntry.EnvironmentVariables,
508 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400509 }
510 return buildStatement, nil
511}
512
Usta Shresthac2372492022-05-27 10:45:00 -0400513func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
514 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400515 if err != nil {
516 return BuildStatement{}, err
517 }
518
Usta Shresthac2372492022-05-27 10:45:00 -0400519 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400520 if err != nil {
521 return BuildStatement{}, err
522 }
523 if len(inputPaths) != 1 || len(outputPaths) != 1 {
524 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
525 }
526 out := outputPaths[0]
527 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
528 out = proptools.ShellEscapeIncludingSpaces(out)
529 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
530 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
531 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
532 symlinkPaths := outputPaths[:]
533
534 buildStatement := BuildStatement{
535 Command: command,
536 Depfile: depfile,
537 OutputPaths: outputPaths,
538 InputPaths: inputPaths,
539 Env: actionEntry.EnvironmentVariables,
540 Mnemonic: actionEntry.Mnemonic,
541 SymlinkPaths: symlinkPaths,
542 }
543 return buildStatement, nil
544}
545
Usta Shresthac2372492022-05-27 10:45:00 -0400546func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400547 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400548 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400549 if !exists {
550 err = fmt.Errorf("undefined outputId %d", outputId)
551 return
552 }
553 ext := filepath.Ext(outputPath)
554 if ext == ".d" {
555 if depfile != nil {
556 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
557 return
558 } else {
559 depfile = &outputPath
560 }
561 } else {
562 outputPaths = append(outputPaths, outputPath)
563 }
564 }
565 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500566}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500567
Wei Li455ba832021-11-04 22:58:12 +0000568// expandTemplateContent substitutes the tokens in a template.
569func expandTemplateContent(actionEntry action) string {
570 replacerString := []string{}
571 for _, pair := range actionEntry.Substitutions {
572 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400573 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000574 value = val
575 }
576 replacerString = append(replacerString, pair.Key, value)
577 }
578 replacer := strings.NewReplacer(replacerString...)
579 return replacer.Replace(actionEntry.TemplateContent)
580}
581
582func escapeCommandlineArgument(str string) string {
583 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
584 replacer := strings.NewReplacer(
585 `\`, `\\`,
586 `$`, `\$`,
587 "`", "\\`",
588 `"`, `\"`,
589 "\n", "\\n",
590 `'`, `'"'"'`,
591 )
592 return replacer.Replace(str)
593}
594
595// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
596// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
597// there is no action returned by aquery for creating it. So in mixed build "python3" is used
598// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
599// removed from input paths and command of creating python zip file.
600// See go/python-binary-host-mixed-build for more details.
601// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400602func removePy3wrapperScript(inputPaths []string, command string) (newInputPaths []string, newCommand string) {
Wei Li455ba832021-11-04 22:58:12 +0000603 // Remove from inputs
604 filteredInputPaths := []string{}
Chris Parsons1a7aca02022-04-25 22:35:15 -0400605 for _, path := range inputPaths {
Wei Li455ba832021-11-04 22:58:12 +0000606 if !strings.HasSuffix(path, py3wrapperFileName) {
607 filteredInputPaths = append(filteredInputPaths, path)
608 }
609 }
610 newInputPaths = filteredInputPaths
611
612 // Remove from command line
613 var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400614 newCommand = re.ReplaceAllString(command, "")
Wei Li455ba832021-11-04 22:58:12 +0000615 return
616}
617
618// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
619// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
620// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
621// but since SourceSymlinkManifest doesn't contain sufficient information
622// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
623// See go/python-binary-host-mixed-build for more details.
624// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400625func addCommandForPyBinaryRunfilesDir(oldCommand string, zipperCommandPath, zipFilePath string) string {
Wei Li455ba832021-11-04 22:58:12 +0000626 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
627 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
628 command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName)
629 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
630 // when running the python binary stub script.
631 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400632 return oldCommand + " && " + command
Wei Li455ba832021-11-04 22:58:12 +0000633}
634
Liz Kammerc49e6822021-06-08 15:04:11 -0400635func isSymlinkAction(a action) bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000636 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400637}
638
Wei Li455ba832021-11-04 22:58:12 +0000639func isTemplateExpandAction(a action) bool {
640 return a.Mnemonic == "TemplateExpand"
641}
642
643func isPythonZipperAction(a action) bool {
644 return a.Mnemonic == "PythonZipper"
645}
646
Chris Parsons8d6e4332021-02-22 16:13:50 -0500647func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400648 // TODO(b/180945121): Handle complex symlink actions.
649 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500650 return true
651 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400652 // Middleman actions are not handled like other actions; they are handled separately as a
653 // preparatory step so that their inputs may be relayed to actions depending on middleman
654 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500655 if a.Mnemonic == "Middleman" {
656 return true
657 }
658 // Skip "Fail" actions, which are placeholder actions designed to always fail.
659 if a.Mnemonic == "Fail" {
660 return true
661 }
662 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
663 // about the contents that are written.
664 if a.Mnemonic == "FileWrite" {
665 return true
666 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700667 if a.Mnemonic == "BaselineCoverage" {
668 return true
669 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500670 return false
671}
672
Usta Shrestha6298cc52022-05-27 17:40:21 -0400673func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
674 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500675 currId := id
676 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
677 for currId > 0 {
678 currFragment, ok := pathFragmentsMap[currId]
679 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500680 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500681 }
682 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400683 if currId == currFragment.ParentId {
684 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
685 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500686 currId = currFragment.ParentId
687 }
688 return filepath.Join(labels...), nil
689}