blob: 030951eb582f41d9556765f88250982359b5d746 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050019 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Wei Li455ba832021-11-04 22:58:12 +000023 "regexp"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040024 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050025 "strings"
26
27 "github.com/google/blueprint/proptools"
28)
29
30// artifact contains relevant portions of Bazel's aquery proto, Artifact.
31// Represents a single artifact, whether it's a source file or a derived output file.
32type artifact struct {
Chris Parsonsaffbb602020-12-23 12:02:11 -050033 Id int
34 PathFragmentId int
35}
36
37type pathFragment struct {
38 Id int
39 Label string
40 ParentId int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050041}
42
43// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
44type KeyValuePair struct {
45 Key string
46 Value string
47}
48
Chris Parsons1a7aca02022-04-25 22:35:15 -040049// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040050// akin to the `depSetOfFiles` in the response proto, except:
51// * direct artifacts are enumerated by full path instead of by ID
MarkDacek75641272022-05-13 20:44:07 +000052// * it has a hash of the depset contents, instead of an int ID (for determinism)
Chris Parsons1a7aca02022-04-25 22:35:15 -040053// A depset is a data structure for efficient transitive handling of artifact
54// paths. A single depset consists of one or more artifact paths and one or
55// more "child" depsets.
56type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040057 ContentHash string
58 DirectArtifacts []string
59 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040060}
61
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050062// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
63// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
64// data structure for storing large numbers of file paths.
65type depSetOfFiles struct {
Chris Parsons943f2432021-01-19 11:36:50 -050066 Id int
67 DirectArtifactIds []int
68 TransitiveDepSetIds []int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050069}
70
71// action contains relevant portions of Bazel's aquery proto, Action.
72// Represents a single command line invocation in the Bazel build graph.
73type action struct {
74 Arguments []string
75 EnvironmentVariables []KeyValuePair
Chris Parsonsaffbb602020-12-23 12:02:11 -050076 InputDepSetIds []int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050077 Mnemonic string
Chris Parsonsaffbb602020-12-23 12:02:11 -050078 OutputIds []int
Wei Li455ba832021-11-04 22:58:12 +000079 TemplateContent string
80 Substitutions []KeyValuePair
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050081}
82
83// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
84// An aquery response from Bazel contains a single ActionGraphContainer proto.
85type actionGraphContainer struct {
86 Artifacts []artifact
87 Actions []action
88 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050089 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050090}
91
92// BuildStatement contains information to register a build statement corresponding (one to one)
93// with a Bazel action from Bazel's action graph.
94type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -040095 Command string
96 Depfile *string
97 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -040098 SymlinkPaths []string
99 Env []KeyValuePair
100 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400101
102 // Inputs of this build statement, either as unexpanded depsets or expanded
103 // input paths. There should be no overlap between these fields; an input
104 // path should either be included as part of an unexpanded depset or a raw
105 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400106 InputDepsetHashes []string
107 InputPaths []string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500108}
109
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400110// A helper type for aquery processing which facilitates retrieval of path IDs from their
111// less readable Bazel structures (depset and path fragment).
112type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400113 // Maps depset id to AqueryDepset, a representation of depset which is
114 // post-processed for middleman artifact handling, unhandled artifact
115 // dropping, content hashing, etc.
116 depsetIdToAqueryDepset map[int]AqueryDepset
117 // Maps content hash to AqueryDepset.
118 depsetHashToAqueryDepset map[string]AqueryDepset
119
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400120 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
121 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400122 depsetHashToArtifactPathsCache map[string][]string
123 // Maps artifact ContentHash to fully expanded path.
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400124 artifactIdToPath map[int]string
125}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500126
Wei Li455ba832021-11-04 22:58:12 +0000127// The tokens should be substituted with the value specified here, instead of the
128// one returned in 'substitutions' of TemplateExpand action.
129var TemplateActionOverriddenTokens = map[string]string{
130 // Uses "python3" for %python_binary% instead of the value returned by aquery
131 // which is "py3wrapper.sh". See removePy3wrapperScript.
132 "%python_binary%": "python3",
133}
134
135// This pattern matches the MANIFEST file created for a py_binary target.
136var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
137
138// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400139const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000140
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400141func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
Chris Parsonsaffbb602020-12-23 12:02:11 -0500142 pathFragments := map[int]pathFragment{}
143 for _, pathFragment := range aqueryResult.PathFragments {
144 pathFragments[pathFragment.Id] = pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500145 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400146
Chris Parsonsaffbb602020-12-23 12:02:11 -0500147 artifactIdToPath := map[int]string{}
148 for _, artifact := range aqueryResult.Artifacts {
149 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
150 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500151 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500152 }
153 artifactIdToPath[artifact.Id] = artifactPath
154 }
Chris Parsons943f2432021-01-19 11:36:50 -0500155
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400156 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400157 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
158 // if we find a middleman action which has outputs [foo, bar], and output [baz_middleman], then,
159 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
160 // that action instead.
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400161 middlemanIdToDepsetIds := map[int][]int{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500162 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500163 if actionEntry.Mnemonic == "Middleman" {
164 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400165 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500166 }
167 }
168 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400169
170 // Store all depset IDs to validate all depset links are resolvable.
171 depsetIds := map[int]bool{}
172 for _, depset := range aqueryResult.DepSetOfFiles {
173 depsetIds[depset.Id] = true
174 }
175
176 depsetIdToDepset := map[int]depSetOfFiles{}
Chris Parsons1a7aca02022-04-25 22:35:15 -0400177 for _, depset := range aqueryResult.DepSetOfFiles {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400178 depsetIdToDepset[depset.Id] = depset
179 }
180
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400181 aqueryHandler := aqueryArtifactHandler{
182 depsetIdToAqueryDepset: map[int]AqueryDepset{},
183 depsetHashToAqueryDepset: map[string]AqueryDepset{},
184 depsetHashToArtifactPathsCache: map[string][]string{},
185 artifactIdToPath: artifactIdToPath,
186 }
187
188 // Validate and adjust aqueryResult.DepSetOfFiles values.
189 for _, depset := range aqueryResult.DepSetOfFiles {
190 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
191 if err != nil {
192 return nil, err
193 }
194 }
195
196 return &aqueryHandler, nil
197}
198
199// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
200// depset.
201func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[int][]int, depsetIdToDepset map[int]depSetOfFiles) (AqueryDepset, error) {
202 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
203 return aqueryDepset, nil
204 }
205 transitiveDepsetIds := depset.TransitiveDepSetIds
206 directArtifactPaths := []string{}
207 for _, artifactId := range depset.DirectArtifactIds {
208 path, pathExists := a.artifactIdToPath[artifactId]
209 if !pathExists {
210 return AqueryDepset{}, fmt.Errorf("undefined input artifactId %d", artifactId)
211 }
212 // Filter out any inputs which are universally dropped, and swap middleman
213 // artifacts with their corresponding depsets.
214 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
215 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
216 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
217 } else if strings.HasSuffix(path, py3wrapperFileName) || manifestFilePattern.MatchString(path) {
218 // Drop these artifacts.
219 // See go/python-binary-host-mixed-build for more details.
220 // 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
221 // Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
222 // 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
223 // but it doesn't contain sufficient information so no Ninja build statements are generated
224 // for creating it.
225 // So in mixed build mode, when these two are used as input of some Ninja build statement,
226 // since there is no build statement to create them, they should be removed from input paths.
227 // TODO(b/197135294): Clean up this custom runfiles handling logic when
228 // SourceSymlinkManifest and SymlinkTree actions are supported.
229 } else {
230 // TODO(b/216194240): Filter out bazel tools.
231 directArtifactPaths = append(directArtifactPaths, path)
232 }
233 }
234
235 childDepsetHashes := []string{}
236 for _, childDepsetId := range transitiveDepsetIds {
237 childDepset, exists := depsetIdToDepset[childDepsetId]
238 if !exists {
239 return AqueryDepset{}, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
240 }
241 childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset)
242 if err != nil {
243 return AqueryDepset{}, err
244 }
245 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
246 }
247 aqueryDepset := AqueryDepset{
248 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
249 DirectArtifacts: directArtifactPaths,
250 TransitiveDepSetHashes: childDepsetHashes,
251 }
252 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
253 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
254 return aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400255}
256
Chris Parsons1a7aca02022-04-25 22:35:15 -0400257// getInputPaths flattens the depsets of the given IDs and returns all transitive
258// input paths contained in these depsets.
259// This is a potentially expensive operation, and should not be invoked except
260// for actions which need specialized input handling.
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400261func (a *aqueryArtifactHandler) getInputPaths(depsetIds []int) ([]string, error) {
262 inputPaths := []string{}
263
264 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400265 depset := a.depsetIdToAqueryDepset[inputDepSetId]
266 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400267 if err != nil {
268 return nil, err
269 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400270 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400271 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400272 }
273 }
Wei Li455ba832021-11-04 22:58:12 +0000274
Chris Parsons1a7aca02022-04-25 22:35:15 -0400275 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400276}
277
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400278func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
279 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400280 return result, nil
281 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400282 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
283 result := depset.DirectArtifacts
284 for _, childHash := range depset.TransitiveDepSetHashes {
285 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400286 if err != nil {
287 return nil, err
288 }
289 result = append(result, childArtifactIds...)
290 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400291 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400292 return result, nil
293 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400294 return nil, fmt.Errorf("undefined input depset hash %d", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400295 }
296}
297
Chris Parsons1a7aca02022-04-25 22:35:15 -0400298// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
299// which should be registered (and output to a ninja file) to correspond with Bazel's
300// action graph, as described by the given action graph json proto.
301// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
302// are one-to-one with Bazel's depSetOfFiles objects.
303func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400304 var aqueryResult actionGraphContainer
305 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
306 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400307 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400308 }
309 aqueryHandler, err := newAqueryHandler(aqueryResult)
310 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400311 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400312 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500313
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400314 var buildStatements []BuildStatement
315
Chris Parsons8d6e4332021-02-22 16:13:50 -0500316 for _, actionEntry := range aqueryResult.Actions {
317 if shouldSkipAction(actionEntry) {
318 continue
319 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400320
Chris Parsons1a7aca02022-04-25 22:35:15 -0400321 var buildStatement BuildStatement
Liz Kammerc49e6822021-06-08 15:04:11 -0400322 if isSymlinkAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400323 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000324 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400325 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000326 } else if isPythonZipperAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400327 buildStatement, err = aqueryHandler.pythonZipperActionBuildStatement(actionEntry, buildStatements)
Liz Kammerc49e6822021-06-08 15:04:11 -0400328 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400329 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
330 } else {
331 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
332 }
333
334 if err != nil {
335 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500336 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500337 buildStatements = append(buildStatements, buildStatement)
338 }
339
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400340 depsetsByHash := map[string]AqueryDepset{}
341 depsets := []AqueryDepset{}
342 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
343 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
344 // Two depsets collide on hash. Ensure that their contents are identical.
345 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
346 return nil, nil, fmt.Errorf("Two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
347 }
348 } else {
349 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
350 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400351 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400352 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400353
354 // Build Statements and depsets must be sorted by their content hash to
355 // preserve determinism between builds (this will result in consistent ninja file
356 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
357 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
358 sort.Slice(buildStatements, func(i, j int) bool {
359 // For build statements, compare output lists. In Bazel, each output file
360 // may only have one action which generates it, so this will provide
361 // a deterministic ordering.
362 outputs_i := buildStatements[i].OutputPaths
363 outputs_j := buildStatements[j].OutputPaths
364 if len(outputs_i) != len(outputs_j) {
365 return len(outputs_i) < len(outputs_j)
366 }
367 if len(outputs_i) == 0 {
368 // No outputs for these actions, so compare commands.
369 return buildStatements[i].Command < buildStatements[j].Command
370 }
371 // There may be multiple outputs, but the output ordering is deterministic.
372 return outputs_i[0] < outputs_j[0]
373 })
374 sort.Slice(depsets, func(i, j int) bool {
375 return depsets[i].ContentHash < depsets[j].ContentHash
376 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400377 return buildStatements, depsets, nil
378}
379
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400380// depsetContentHash computes and returns a SHA256 checksum of the contents of
381// the given depset. This content hash may serve as the depset's identifier.
382// Using a content hash for an identifier is superior for determinism. (For example,
383// using an integer identifier which depends on the order in which the depsets are
384// created would result in nondeterministic depset IDs.)
385func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
386 h := sha256.New()
387 // Use newline as delimiter, as paths cannot contain newline.
388 h.Write([]byte(strings.Join(directPaths, "\n")))
389 h.Write([]byte(strings.Join(transitiveDepsetHashes, "\n")))
390 fullHash := fmt.Sprintf("%016x", h.Sum(nil))
391 return fullHash
392}
393
Usta Shresthac2372492022-05-27 10:45:00 -0400394func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []int) ([]string, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400395 hashes := []string{}
Chris Parsons1a7aca02022-04-25 22:35:15 -0400396 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400397 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400398 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400399 } else {
400 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400401 }
402 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400403 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400404}
405
Usta Shresthac2372492022-05-27 10:45:00 -0400406func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400407 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400408 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400409 if err != nil {
410 return BuildStatement{}, err
411 }
Usta Shresthac2372492022-05-27 10:45:00 -0400412 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400413 if err != nil {
414 return BuildStatement{}, err
415 }
416
417 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400418 Command: command,
419 Depfile: depfile,
420 OutputPaths: outputPaths,
421 InputDepsetHashes: inputDepsetHashes,
422 Env: actionEntry.EnvironmentVariables,
423 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400424 }
425 return buildStatement, nil
426}
427
Usta Shresthac2372492022-05-27 10:45:00 -0400428func (a *aqueryArtifactHandler) pythonZipperActionBuildStatement(actionEntry action, prevBuildStatements []BuildStatement) (BuildStatement, error) {
429 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400430 if err != nil {
431 return BuildStatement{}, err
432 }
Usta Shresthac2372492022-05-27 10:45:00 -0400433 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400434 if err != nil {
435 return BuildStatement{}, err
436 }
437
438 if len(inputPaths) < 1 || len(outputPaths) != 1 {
439 return BuildStatement{}, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
440 }
441 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
442 inputPaths, command = removePy3wrapperScript(inputPaths, command)
443 command = addCommandForPyBinaryRunfilesDir(command, inputPaths[0], outputPaths[0])
444 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
445 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
446 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
447 //
448 // The following logic relies on that Bazel aquery output returns actions in the order that
449 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
450 // in that order, the following logic might not find the build statement generated for Python binary
451 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
452 // See go/python-binary-host-mixed-build for more details.
453 pythonZipFilePath := outputPaths[0]
454 pyBinaryFound := false
455 for i, _ := range prevBuildStatements {
456 if len(prevBuildStatements[i].OutputPaths) == 1 && prevBuildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
457 prevBuildStatements[i].InputPaths = append(prevBuildStatements[i].InputPaths, pythonZipFilePath)
458 pyBinaryFound = true
459 }
460 }
461 if !pyBinaryFound {
462 return BuildStatement{}, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
463 }
464
465 buildStatement := BuildStatement{
466 Command: command,
467 Depfile: depfile,
468 OutputPaths: outputPaths,
469 InputPaths: inputPaths,
470 Env: actionEntry.EnvironmentVariables,
471 Mnemonic: actionEntry.Mnemonic,
472 }
473 return buildStatement, nil
474}
475
Usta Shresthac2372492022-05-27 10:45:00 -0400476func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
477 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400478 if err != nil {
479 return BuildStatement{}, err
480 }
481 if len(outputPaths) != 1 {
482 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
483 }
484 expandedTemplateContent := expandTemplateContent(actionEntry)
485 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
486 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
487 // change \n to space and mess up the format of Python programs.
488 // sed is used to convert \\n back to \n before saving to output file.
489 // See go/python-binary-host-mixed-build for more details.
490 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
491 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400492 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400493 if err != nil {
494 return BuildStatement{}, err
495 }
496
497 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400498 Command: command,
499 Depfile: depfile,
500 OutputPaths: outputPaths,
501 InputDepsetHashes: inputDepsetHashes,
502 Env: actionEntry.EnvironmentVariables,
503 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400504 }
505 return buildStatement, nil
506}
507
Usta Shresthac2372492022-05-27 10:45:00 -0400508func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
509 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400510 if err != nil {
511 return BuildStatement{}, err
512 }
513
Usta Shresthac2372492022-05-27 10:45:00 -0400514 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400515 if err != nil {
516 return BuildStatement{}, err
517 }
518 if len(inputPaths) != 1 || len(outputPaths) != 1 {
519 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
520 }
521 out := outputPaths[0]
522 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
523 out = proptools.ShellEscapeIncludingSpaces(out)
524 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
525 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
526 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
527 symlinkPaths := outputPaths[:]
528
529 buildStatement := BuildStatement{
530 Command: command,
531 Depfile: depfile,
532 OutputPaths: outputPaths,
533 InputPaths: inputPaths,
534 Env: actionEntry.EnvironmentVariables,
535 Mnemonic: actionEntry.Mnemonic,
536 SymlinkPaths: symlinkPaths,
537 }
538 return buildStatement, nil
539}
540
Usta Shresthac2372492022-05-27 10:45:00 -0400541func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400542 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400543 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400544 if !exists {
545 err = fmt.Errorf("undefined outputId %d", outputId)
546 return
547 }
548 ext := filepath.Ext(outputPath)
549 if ext == ".d" {
550 if depfile != nil {
551 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
552 return
553 } else {
554 depfile = &outputPath
555 }
556 } else {
557 outputPaths = append(outputPaths, outputPath)
558 }
559 }
560 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500561}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500562
Wei Li455ba832021-11-04 22:58:12 +0000563// expandTemplateContent substitutes the tokens in a template.
564func expandTemplateContent(actionEntry action) string {
565 replacerString := []string{}
566 for _, pair := range actionEntry.Substitutions {
567 value := pair.Value
568 if val, ok := TemplateActionOverriddenTokens[pair.Key]; ok {
569 value = val
570 }
571 replacerString = append(replacerString, pair.Key, value)
572 }
573 replacer := strings.NewReplacer(replacerString...)
574 return replacer.Replace(actionEntry.TemplateContent)
575}
576
577func escapeCommandlineArgument(str string) string {
578 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
579 replacer := strings.NewReplacer(
580 `\`, `\\`,
581 `$`, `\$`,
582 "`", "\\`",
583 `"`, `\"`,
584 "\n", "\\n",
585 `'`, `'"'"'`,
586 )
587 return replacer.Replace(str)
588}
589
590// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
591// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
592// there is no action returned by aquery for creating it. So in mixed build "python3" is used
593// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
594// removed from input paths and command of creating python zip file.
595// See go/python-binary-host-mixed-build for more details.
596// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400597func removePy3wrapperScript(inputPaths []string, command string) (newInputPaths []string, newCommand string) {
Wei Li455ba832021-11-04 22:58:12 +0000598 // Remove from inputs
599 filteredInputPaths := []string{}
Chris Parsons1a7aca02022-04-25 22:35:15 -0400600 for _, path := range inputPaths {
Wei Li455ba832021-11-04 22:58:12 +0000601 if !strings.HasSuffix(path, py3wrapperFileName) {
602 filteredInputPaths = append(filteredInputPaths, path)
603 }
604 }
605 newInputPaths = filteredInputPaths
606
607 // Remove from command line
608 var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400609 newCommand = re.ReplaceAllString(command, "")
Wei Li455ba832021-11-04 22:58:12 +0000610 return
611}
612
613// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
614// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
615// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
616// but since SourceSymlinkManifest doesn't contain sufficient information
617// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
618// See go/python-binary-host-mixed-build for more details.
619// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400620func addCommandForPyBinaryRunfilesDir(oldCommand string, zipperCommandPath, zipFilePath string) string {
Wei Li455ba832021-11-04 22:58:12 +0000621 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
622 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
623 command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName)
624 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
625 // when running the python binary stub script.
626 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400627 return oldCommand + " && " + command
Wei Li455ba832021-11-04 22:58:12 +0000628}
629
Liz Kammerc49e6822021-06-08 15:04:11 -0400630func isSymlinkAction(a action) bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000631 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400632}
633
Wei Li455ba832021-11-04 22:58:12 +0000634func isTemplateExpandAction(a action) bool {
635 return a.Mnemonic == "TemplateExpand"
636}
637
638func isPythonZipperAction(a action) bool {
639 return a.Mnemonic == "PythonZipper"
640}
641
Chris Parsons8d6e4332021-02-22 16:13:50 -0500642func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400643 // TODO(b/180945121): Handle complex symlink actions.
644 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500645 return true
646 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400647 // Middleman actions are not handled like other actions; they are handled separately as a
648 // preparatory step so that their inputs may be relayed to actions depending on middleman
649 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500650 if a.Mnemonic == "Middleman" {
651 return true
652 }
653 // Skip "Fail" actions, which are placeholder actions designed to always fail.
654 if a.Mnemonic == "Fail" {
655 return true
656 }
657 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
658 // about the contents that are written.
659 if a.Mnemonic == "FileWrite" {
660 return true
661 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700662 if a.Mnemonic == "BaselineCoverage" {
663 return true
664 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500665 return false
666}
667
Chris Parsonsaffbb602020-12-23 12:02:11 -0500668func expandPathFragment(id int, pathFragmentsMap map[int]pathFragment) (string, error) {
669 labels := []string{}
670 currId := id
671 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
672 for currId > 0 {
673 currFragment, ok := pathFragmentsMap[currId]
674 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500675 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500676 }
677 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400678 if currId == currFragment.ParentId {
679 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
680 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500681 currId = currFragment.ParentId
682 }
683 return filepath.Join(labels...), nil
684}