blob: ab31581bf839523f5cb41713c77d74a12e9f151a [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050019 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Wei Li455ba832021-11-04 22:58:12 +000023 "regexp"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040024 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050025 "strings"
26
27 "github.com/google/blueprint/proptools"
28)
29
Usta Shrestha6298cc52022-05-27 17:40:21 -040030type artifactId int
31type depsetId int
32type pathFragmentId int
33
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050034// artifact contains relevant portions of Bazel's aquery proto, Artifact.
35// Represents a single artifact, whether it's a source file or a derived output file.
36type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040037 Id artifactId
38 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050039}
40
41type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040042 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050043 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040044 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050045}
46
47// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
48type KeyValuePair struct {
49 Key string
50 Value string
51}
52
Chris Parsons1a7aca02022-04-25 22:35:15 -040053// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040054// akin to the `depSetOfFiles` in the response proto, except:
55// * direct artifacts are enumerated by full path instead of by ID
MarkDacek75641272022-05-13 20:44:07 +000056// * it has a hash of the depset contents, instead of an int ID (for determinism)
Chris Parsons1a7aca02022-04-25 22:35:15 -040057// A depset is a data structure for efficient transitive handling of artifact
58// paths. A single depset consists of one or more artifact paths and one or
59// more "child" depsets.
60type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040061 ContentHash string
62 DirectArtifacts []string
63 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040064}
65
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050066// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
67// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
68// data structure for storing large numbers of file paths.
69type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040070 Id depsetId
71 DirectArtifactIds []artifactId
72 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050073}
74
75// action contains relevant portions of Bazel's aquery proto, Action.
76// Represents a single command line invocation in the Bazel build graph.
77type action struct {
78 Arguments []string
79 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040080 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050081 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040082 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000083 TemplateContent string
84 Substitutions []KeyValuePair
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050085}
86
87// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
88// An aquery response from Bazel contains a single ActionGraphContainer proto.
89type actionGraphContainer struct {
90 Artifacts []artifact
91 Actions []action
92 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050093 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050094}
95
96// BuildStatement contains information to register a build statement corresponding (one to one)
97// with a Bazel action from Bazel's action graph.
98type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -040099 Command string
100 Depfile *string
101 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400102 SymlinkPaths []string
103 Env []KeyValuePair
104 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400105
106 // Inputs of this build statement, either as unexpanded depsets or expanded
107 // input paths. There should be no overlap between these fields; an input
108 // path should either be included as part of an unexpanded depset or a raw
109 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400110 InputDepsetHashes []string
111 InputPaths []string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500112}
113
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400114// A helper type for aquery processing which facilitates retrieval of path IDs from their
115// less readable Bazel structures (depset and path fragment).
116type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400117 // Maps depset id to AqueryDepset, a representation of depset which is
118 // post-processed for middleman artifact handling, unhandled artifact
119 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400120 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400121 // Maps content hash to AqueryDepset.
122 depsetHashToAqueryDepset map[string]AqueryDepset
123
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400124 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
125 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400126 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400127 // Maps artifact ids to fully expanded paths.
128 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400129}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500130
Wei Li455ba832021-11-04 22:58:12 +0000131// The tokens should be substituted with the value specified here, instead of the
132// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400133var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000134 // Uses "python3" for %python_binary% instead of the value returned by aquery
135 // which is "py3wrapper.sh". See removePy3wrapperScript.
136 "%python_binary%": "python3",
137}
138
139// This pattern matches the MANIFEST file created for a py_binary target.
140var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
141
142// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400143const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000144
Usta Shrestha6298cc52022-05-27 17:40:21 -0400145func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
146 m := map[K]V{}
147 for _, v := range values {
148 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500149 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400150 return m
151}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400152
Usta Shrestha6298cc52022-05-27 17:40:21 -0400153func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
154 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
155 return pf.Id
156 })
157
158 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500159 for _, artifact := range aqueryResult.Artifacts {
160 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
161 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500162 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500163 }
164 artifactIdToPath[artifact.Id] = artifactPath
165 }
Chris Parsons943f2432021-01-19 11:36:50 -0500166
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400167 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400168 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
169 // if we find a middleman action which has outputs [foo, bar], and output [baz_middleman], then,
170 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
171 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400172 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500173 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500174 if actionEntry.Mnemonic == "Middleman" {
175 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400176 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500177 }
178 }
179 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400180
Usta Shrestha6298cc52022-05-27 17:40:21 -0400181 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
182 return d.Id
183 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400184
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400185 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400186 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400187 depsetHashToAqueryDepset: map[string]AqueryDepset{},
188 depsetHashToArtifactPathsCache: map[string][]string{},
189 artifactIdToPath: artifactIdToPath,
190 }
191
192 // Validate and adjust aqueryResult.DepSetOfFiles values.
193 for _, depset := range aqueryResult.DepSetOfFiles {
194 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
195 if err != nil {
196 return nil, err
197 }
198 }
199
200 return &aqueryHandler, nil
201}
202
203// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
204// depset.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400205func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400206 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
207 return aqueryDepset, nil
208 }
209 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400210 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400211 for _, artifactId := range depset.DirectArtifactIds {
212 path, pathExists := a.artifactIdToPath[artifactId]
213 if !pathExists {
214 return AqueryDepset{}, fmt.Errorf("undefined input artifactId %d", artifactId)
215 }
216 // Filter out any inputs which are universally dropped, and swap middleman
217 // artifacts with their corresponding depsets.
218 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
219 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
220 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
221 } else if strings.HasSuffix(path, py3wrapperFileName) || manifestFilePattern.MatchString(path) {
222 // Drop these artifacts.
223 // See go/python-binary-host-mixed-build for more details.
224 // 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
225 // Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
226 // 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
227 // but it doesn't contain sufficient information so no Ninja build statements are generated
228 // for creating it.
229 // So in mixed build mode, when these two are used as input of some Ninja build statement,
230 // since there is no build statement to create them, they should be removed from input paths.
231 // TODO(b/197135294): Clean up this custom runfiles handling logic when
232 // SourceSymlinkManifest and SymlinkTree actions are supported.
233 } else {
234 // TODO(b/216194240): Filter out bazel tools.
235 directArtifactPaths = append(directArtifactPaths, path)
236 }
237 }
238
Usta Shrestha6298cc52022-05-27 17:40:21 -0400239 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400240 for _, childDepsetId := range transitiveDepsetIds {
241 childDepset, exists := depsetIdToDepset[childDepsetId]
242 if !exists {
243 return AqueryDepset{}, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
244 }
245 childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset)
246 if err != nil {
247 return AqueryDepset{}, err
248 }
249 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
250 }
251 aqueryDepset := AqueryDepset{
252 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
253 DirectArtifacts: directArtifactPaths,
254 TransitiveDepSetHashes: childDepsetHashes,
255 }
256 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
257 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
258 return aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400259}
260
Chris Parsons1a7aca02022-04-25 22:35:15 -0400261// getInputPaths flattens the depsets of the given IDs and returns all transitive
262// input paths contained in these depsets.
263// This is a potentially expensive operation, and should not be invoked except
264// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400265func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
266 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400267
268 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400269 depset := a.depsetIdToAqueryDepset[inputDepSetId]
270 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400271 if err != nil {
272 return nil, err
273 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400274 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400275 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400276 }
277 }
Wei Li455ba832021-11-04 22:58:12 +0000278
Chris Parsons1a7aca02022-04-25 22:35:15 -0400279 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400280}
281
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400282func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
283 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400284 return result, nil
285 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400286 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
287 result := depset.DirectArtifacts
288 for _, childHash := range depset.TransitiveDepSetHashes {
289 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400290 if err != nil {
291 return nil, err
292 }
293 result = append(result, childArtifactIds...)
294 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400295 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400296 return result, nil
297 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400298 return nil, fmt.Errorf("undefined input depset hash %d", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400299 }
300}
301
Chris Parsons1a7aca02022-04-25 22:35:15 -0400302// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400303// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400304// action graph, as described by the given action graph json proto.
305// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
306// are one-to-one with Bazel's depSetOfFiles objects.
307func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400308 var aqueryResult actionGraphContainer
309 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
310 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400311 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400312 }
313 aqueryHandler, err := newAqueryHandler(aqueryResult)
314 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400315 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400316 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500317
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400318 var buildStatements []BuildStatement
319
Chris Parsons8d6e4332021-02-22 16:13:50 -0500320 for _, actionEntry := range aqueryResult.Actions {
321 if shouldSkipAction(actionEntry) {
322 continue
323 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400324
Chris Parsons1a7aca02022-04-25 22:35:15 -0400325 var buildStatement BuildStatement
Liz Kammerc49e6822021-06-08 15:04:11 -0400326 if isSymlinkAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400327 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000328 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400329 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000330 } else if isPythonZipperAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400331 buildStatement, err = aqueryHandler.pythonZipperActionBuildStatement(actionEntry, buildStatements)
Liz Kammerc49e6822021-06-08 15:04:11 -0400332 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400333 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
334 } else {
335 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
336 }
337
338 if err != nil {
339 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500340 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500341 buildStatements = append(buildStatements, buildStatement)
342 }
343
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400344 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400345 var depsets []AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400346 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
347 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
348 // Two depsets collide on hash. Ensure that their contents are identical.
349 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
350 return nil, nil, fmt.Errorf("Two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
351 }
352 } else {
353 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
354 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400355 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400356 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400357
358 // Build Statements and depsets must be sorted by their content hash to
359 // preserve determinism between builds (this will result in consistent ninja file
360 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
361 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
362 sort.Slice(buildStatements, func(i, j int) bool {
363 // For build statements, compare output lists. In Bazel, each output file
364 // may only have one action which generates it, so this will provide
365 // a deterministic ordering.
366 outputs_i := buildStatements[i].OutputPaths
367 outputs_j := buildStatements[j].OutputPaths
368 if len(outputs_i) != len(outputs_j) {
369 return len(outputs_i) < len(outputs_j)
370 }
371 if len(outputs_i) == 0 {
372 // No outputs for these actions, so compare commands.
373 return buildStatements[i].Command < buildStatements[j].Command
374 }
375 // There may be multiple outputs, but the output ordering is deterministic.
376 return outputs_i[0] < outputs_j[0]
377 })
378 sort.Slice(depsets, func(i, j int) bool {
379 return depsets[i].ContentHash < depsets[j].ContentHash
380 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400381 return buildStatements, depsets, nil
382}
383
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400384// depsetContentHash computes and returns a SHA256 checksum of the contents of
385// the given depset. This content hash may serve as the depset's identifier.
386// Using a content hash for an identifier is superior for determinism. (For example,
387// using an integer identifier which depends on the order in which the depsets are
388// created would result in nondeterministic depset IDs.)
389func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
390 h := sha256.New()
391 // Use newline as delimiter, as paths cannot contain newline.
392 h.Write([]byte(strings.Join(directPaths, "\n")))
393 h.Write([]byte(strings.Join(transitiveDepsetHashes, "\n")))
394 fullHash := fmt.Sprintf("%016x", h.Sum(nil))
395 return fullHash
396}
397
Usta Shrestha6298cc52022-05-27 17:40:21 -0400398func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
399 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400400 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400401 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400402 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400403 } else {
404 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400405 }
406 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400407 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400408}
409
Usta Shresthac2372492022-05-27 10:45:00 -0400410func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400411 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400412 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400413 if err != nil {
414 return BuildStatement{}, err
415 }
Usta Shresthac2372492022-05-27 10:45:00 -0400416 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400417 if err != nil {
418 return BuildStatement{}, err
419 }
420
421 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400422 Command: command,
423 Depfile: depfile,
424 OutputPaths: outputPaths,
425 InputDepsetHashes: inputDepsetHashes,
426 Env: actionEntry.EnvironmentVariables,
427 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400428 }
429 return buildStatement, nil
430}
431
Usta Shresthac2372492022-05-27 10:45:00 -0400432func (a *aqueryArtifactHandler) pythonZipperActionBuildStatement(actionEntry action, prevBuildStatements []BuildStatement) (BuildStatement, error) {
433 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400434 if err != nil {
435 return BuildStatement{}, err
436 }
Usta Shresthac2372492022-05-27 10:45:00 -0400437 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400438 if err != nil {
439 return BuildStatement{}, err
440 }
441
442 if len(inputPaths) < 1 || len(outputPaths) != 1 {
443 return BuildStatement{}, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
444 }
445 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
446 inputPaths, command = removePy3wrapperScript(inputPaths, command)
447 command = addCommandForPyBinaryRunfilesDir(command, inputPaths[0], outputPaths[0])
448 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
449 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
450 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
451 //
452 // The following logic relies on that Bazel aquery output returns actions in the order that
453 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
454 // in that order, the following logic might not find the build statement generated for Python binary
455 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
456 // See go/python-binary-host-mixed-build for more details.
457 pythonZipFilePath := outputPaths[0]
458 pyBinaryFound := false
Usta Shrestha6298cc52022-05-27 17:40:21 -0400459 for i := range prevBuildStatements {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400460 if len(prevBuildStatements[i].OutputPaths) == 1 && prevBuildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
461 prevBuildStatements[i].InputPaths = append(prevBuildStatements[i].InputPaths, pythonZipFilePath)
462 pyBinaryFound = true
463 }
464 }
465 if !pyBinaryFound {
466 return BuildStatement{}, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
467 }
468
469 buildStatement := BuildStatement{
470 Command: command,
471 Depfile: depfile,
472 OutputPaths: outputPaths,
473 InputPaths: inputPaths,
474 Env: actionEntry.EnvironmentVariables,
475 Mnemonic: actionEntry.Mnemonic,
476 }
477 return buildStatement, nil
478}
479
Usta Shresthac2372492022-05-27 10:45:00 -0400480func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
481 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400482 if err != nil {
483 return BuildStatement{}, err
484 }
485 if len(outputPaths) != 1 {
486 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
487 }
488 expandedTemplateContent := expandTemplateContent(actionEntry)
489 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
490 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
491 // change \n to space and mess up the format of Python programs.
492 // sed is used to convert \\n back to \n before saving to output file.
493 // See go/python-binary-host-mixed-build for more details.
494 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
495 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400496 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400497 if err != nil {
498 return BuildStatement{}, err
499 }
500
501 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400502 Command: command,
503 Depfile: depfile,
504 OutputPaths: outputPaths,
505 InputDepsetHashes: inputDepsetHashes,
506 Env: actionEntry.EnvironmentVariables,
507 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400508 }
509 return buildStatement, nil
510}
511
Usta Shresthac2372492022-05-27 10:45:00 -0400512func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
513 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400514 if err != nil {
515 return BuildStatement{}, err
516 }
517
Usta Shresthac2372492022-05-27 10:45:00 -0400518 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400519 if err != nil {
520 return BuildStatement{}, err
521 }
522 if len(inputPaths) != 1 || len(outputPaths) != 1 {
523 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
524 }
525 out := outputPaths[0]
526 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
527 out = proptools.ShellEscapeIncludingSpaces(out)
528 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
529 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
530 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
531 symlinkPaths := outputPaths[:]
532
533 buildStatement := BuildStatement{
534 Command: command,
535 Depfile: depfile,
536 OutputPaths: outputPaths,
537 InputPaths: inputPaths,
538 Env: actionEntry.EnvironmentVariables,
539 Mnemonic: actionEntry.Mnemonic,
540 SymlinkPaths: symlinkPaths,
541 }
542 return buildStatement, nil
543}
544
Usta Shresthac2372492022-05-27 10:45:00 -0400545func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400546 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400547 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400548 if !exists {
549 err = fmt.Errorf("undefined outputId %d", outputId)
550 return
551 }
552 ext := filepath.Ext(outputPath)
553 if ext == ".d" {
554 if depfile != nil {
555 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
556 return
557 } else {
558 depfile = &outputPath
559 }
560 } else {
561 outputPaths = append(outputPaths, outputPath)
562 }
563 }
564 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500565}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500566
Wei Li455ba832021-11-04 22:58:12 +0000567// expandTemplateContent substitutes the tokens in a template.
568func expandTemplateContent(actionEntry action) string {
569 replacerString := []string{}
570 for _, pair := range actionEntry.Substitutions {
571 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400572 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000573 value = val
574 }
575 replacerString = append(replacerString, pair.Key, value)
576 }
577 replacer := strings.NewReplacer(replacerString...)
578 return replacer.Replace(actionEntry.TemplateContent)
579}
580
581func escapeCommandlineArgument(str string) string {
582 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
583 replacer := strings.NewReplacer(
584 `\`, `\\`,
585 `$`, `\$`,
586 "`", "\\`",
587 `"`, `\"`,
588 "\n", "\\n",
589 `'`, `'"'"'`,
590 )
591 return replacer.Replace(str)
592}
593
594// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
595// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
596// there is no action returned by aquery for creating it. So in mixed build "python3" is used
597// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
598// removed from input paths and command of creating python zip file.
599// See go/python-binary-host-mixed-build for more details.
600// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400601func removePy3wrapperScript(inputPaths []string, command string) (newInputPaths []string, newCommand string) {
Wei Li455ba832021-11-04 22:58:12 +0000602 // Remove from inputs
603 filteredInputPaths := []string{}
Chris Parsons1a7aca02022-04-25 22:35:15 -0400604 for _, path := range inputPaths {
Wei Li455ba832021-11-04 22:58:12 +0000605 if !strings.HasSuffix(path, py3wrapperFileName) {
606 filteredInputPaths = append(filteredInputPaths, path)
607 }
608 }
609 newInputPaths = filteredInputPaths
610
611 // Remove from command line
612 var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400613 newCommand = re.ReplaceAllString(command, "")
Wei Li455ba832021-11-04 22:58:12 +0000614 return
615}
616
617// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
618// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
619// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
620// but since SourceSymlinkManifest doesn't contain sufficient information
621// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
622// See go/python-binary-host-mixed-build for more details.
623// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400624func addCommandForPyBinaryRunfilesDir(oldCommand string, zipperCommandPath, zipFilePath string) string {
Wei Li455ba832021-11-04 22:58:12 +0000625 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
626 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
627 command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName)
628 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
629 // when running the python binary stub script.
630 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400631 return oldCommand + " && " + command
Wei Li455ba832021-11-04 22:58:12 +0000632}
633
Liz Kammerc49e6822021-06-08 15:04:11 -0400634func isSymlinkAction(a action) bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000635 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400636}
637
Wei Li455ba832021-11-04 22:58:12 +0000638func isTemplateExpandAction(a action) bool {
639 return a.Mnemonic == "TemplateExpand"
640}
641
642func isPythonZipperAction(a action) bool {
643 return a.Mnemonic == "PythonZipper"
644}
645
Chris Parsons8d6e4332021-02-22 16:13:50 -0500646func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400647 // TODO(b/180945121): Handle complex symlink actions.
648 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500649 return true
650 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400651 // Middleman actions are not handled like other actions; they are handled separately as a
652 // preparatory step so that their inputs may be relayed to actions depending on middleman
653 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500654 if a.Mnemonic == "Middleman" {
655 return true
656 }
657 // Skip "Fail" actions, which are placeholder actions designed to always fail.
658 if a.Mnemonic == "Fail" {
659 return true
660 }
661 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
662 // about the contents that are written.
663 if a.Mnemonic == "FileWrite" {
664 return true
665 }
666 return false
667}
668
Usta Shrestha6298cc52022-05-27 17:40:21 -0400669func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
670 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500671 currId := id
672 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
673 for currId > 0 {
674 currFragment, ok := pathFragmentsMap[currId]
675 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500676 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500677 }
678 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400679 if currId == currFragment.ParentId {
680 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
681 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500682 currId = currFragment.ParentId
683 }
684 return filepath.Join(labels...), nil
685}