blob: 1d1f49cdf004f61c5054238c8e7073a2113358a8 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050020 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050021 "fmt"
22 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "reflect"
Wei Li455ba832021-11-04 22:58:12 +000024 "regexp"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040025 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050026 "strings"
27
28 "github.com/google/blueprint/proptools"
29)
30
Usta Shrestha6298cc52022-05-27 17:40:21 -040031type artifactId int
32type depsetId int
33type pathFragmentId int
34
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050035// artifact contains relevant portions of Bazel's aquery proto, Artifact.
36// Represents a single artifact, whether it's a source file or a derived output file.
37type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040038 Id artifactId
39 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050040}
41
42type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040043 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050044 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050046}
47
48// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
49type KeyValuePair struct {
50 Key string
51 Value string
52}
53
Chris Parsons1a7aca02022-04-25 22:35:15 -040054// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040055// akin to the `depSetOfFiles` in the response proto, except:
56// * direct artifacts are enumerated by full path instead of by ID
MarkDacek75641272022-05-13 20:44:07 +000057// * it has a hash of the depset contents, instead of an int ID (for determinism)
Chris Parsons1a7aca02022-04-25 22:35:15 -040058// A depset is a data structure for efficient transitive handling of artifact
59// paths. A single depset consists of one or more artifact paths and one or
60// more "child" depsets.
61type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040062 ContentHash string
63 DirectArtifacts []string
64 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040065}
66
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050067// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
68// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
69// data structure for storing large numbers of file paths.
70type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040071 Id depsetId
72 DirectArtifactIds []artifactId
73 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050074}
75
76// action contains relevant portions of Bazel's aquery proto, Action.
77// Represents a single command line invocation in the Bazel build graph.
78type action struct {
79 Arguments []string
80 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040081 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050082 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040083 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000084 TemplateContent string
85 Substitutions []KeyValuePair
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050086}
87
88// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
89// An aquery response from Bazel contains a single ActionGraphContainer proto.
90type actionGraphContainer struct {
91 Artifacts []artifact
92 Actions []action
93 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050094 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050095}
96
97// BuildStatement contains information to register a build statement corresponding (one to one)
98// with a Bazel action from Bazel's action graph.
99type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400100 Command string
101 Depfile *string
102 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400103 SymlinkPaths []string
104 Env []KeyValuePair
105 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400106
107 // Inputs of this build statement, either as unexpanded depsets or expanded
108 // input paths. There should be no overlap between these fields; an input
109 // path should either be included as part of an unexpanded depset or a raw
110 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400111 InputDepsetHashes []string
112 InputPaths []string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500113}
114
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400115// A helper type for aquery processing which facilitates retrieval of path IDs from their
116// less readable Bazel structures (depset and path fragment).
117type aqueryArtifactHandler struct {
Usta Shresthaef922252022-06-02 14:23:02 -0400118 // Switches to true if any depset contains only `bazelToolsDependencySentinel`
119 bazelToolsDependencySentinelNeeded bool
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400120 // Maps depset id to AqueryDepset, a representation of depset which is
121 // post-processed for middleman artifact handling, unhandled artifact
122 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400123 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400124 // Maps content hash to AqueryDepset.
125 depsetHashToAqueryDepset map[string]AqueryDepset
126
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400127 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
128 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400129 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400130 // Maps artifact ids to fully expanded paths.
131 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400132}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500133
Wei Li455ba832021-11-04 22:58:12 +0000134// The tokens should be substituted with the value specified here, instead of the
135// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400136var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000137 // Uses "python3" for %python_binary% instead of the value returned by aquery
138 // which is "py3wrapper.sh". See removePy3wrapperScript.
139 "%python_binary%": "python3",
140}
141
142// This pattern matches the MANIFEST file created for a py_binary target.
143var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
144
145// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400146const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000147
Usta Shresthaef922252022-06-02 14:23:02 -0400148// A file to be put into depsets that are otherwise empty
149const bazelToolsDependencySentinel = "BAZEL_TOOLS_DEPENDENCY_SENTINEL"
150
Usta Shrestha6298cc52022-05-27 17:40:21 -0400151func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
152 m := map[K]V{}
153 for _, v := range values {
154 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500155 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400156 return m
157}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400158
Usta Shrestha6298cc52022-05-27 17:40:21 -0400159func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
160 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
161 return pf.Id
162 })
163
164 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500165 for _, artifact := range aqueryResult.Artifacts {
166 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
167 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500168 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500169 }
170 artifactIdToPath[artifact.Id] = artifactPath
171 }
Chris Parsons943f2432021-01-19 11:36:50 -0500172
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400173 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400174 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400175 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400176 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
177 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400178 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500179 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500180 if actionEntry.Mnemonic == "Middleman" {
181 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400182 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500183 }
184 }
185 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400186
Usta Shrestha6298cc52022-05-27 17:40:21 -0400187 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
188 return d.Id
189 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400190
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400191 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400192 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400193 depsetHashToAqueryDepset: map[string]AqueryDepset{},
194 depsetHashToArtifactPathsCache: map[string][]string{},
195 artifactIdToPath: artifactIdToPath,
196 }
197
198 // Validate and adjust aqueryResult.DepSetOfFiles values.
199 for _, depset := range aqueryResult.DepSetOfFiles {
200 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
201 if err != nil {
202 return nil, err
203 }
204 }
205
206 return &aqueryHandler, nil
207}
208
209// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
210// depset.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400211func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400212 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
213 return aqueryDepset, nil
214 }
215 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400216 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400217 for _, artifactId := range depset.DirectArtifactIds {
218 path, pathExists := a.artifactIdToPath[artifactId]
219 if !pathExists {
220 return AqueryDepset{}, fmt.Errorf("undefined input artifactId %d", artifactId)
221 }
222 // Filter out any inputs which are universally dropped, and swap middleman
223 // artifacts with their corresponding depsets.
224 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
225 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
226 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400227 } else if strings.HasSuffix(path, py3wrapperFileName) ||
228 manifestFilePattern.MatchString(path) ||
229 strings.HasPrefix(path, "../bazel_tools") {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400230 // Drop these artifacts.
231 // See go/python-binary-host-mixed-build for more details.
232 // 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
233 // Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
234 // 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
235 // but it doesn't contain sufficient information so no Ninja build statements are generated
236 // for creating it.
237 // So in mixed build mode, when these two are used as input of some Ninja build statement,
238 // since there is no build statement to create them, they should be removed from input paths.
239 // TODO(b/197135294): Clean up this custom runfiles handling logic when
240 // SourceSymlinkManifest and SymlinkTree actions are supported.
Usta Shresthaef922252022-06-02 14:23:02 -0400241 // 3) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
242 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400243 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400244 directArtifactPaths = append(directArtifactPaths, path)
245 }
246 }
247
Usta Shrestha6298cc52022-05-27 17:40:21 -0400248 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400249 for _, childDepsetId := range transitiveDepsetIds {
250 childDepset, exists := depsetIdToDepset[childDepsetId]
251 if !exists {
252 return AqueryDepset{}, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
253 }
254 childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset)
255 if err != nil {
256 return AqueryDepset{}, err
257 }
258 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
259 }
Usta Shresthaef922252022-06-02 14:23:02 -0400260 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
261 // We could omit this depset altogether but that requires cleanup on
262 // transitive dependents.
263 // As a simpler alternative, we use this sentinel file as a dependency.
264 directArtifactPaths = append(directArtifactPaths, bazelToolsDependencySentinel)
265 a.bazelToolsDependencySentinelNeeded = true
266 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400267 aqueryDepset := AqueryDepset{
268 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
269 DirectArtifacts: directArtifactPaths,
270 TransitiveDepSetHashes: childDepsetHashes,
271 }
272 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
273 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
274 return aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400275}
276
Chris Parsons1a7aca02022-04-25 22:35:15 -0400277// getInputPaths flattens the depsets of the given IDs and returns all transitive
278// input paths contained in these depsets.
279// This is a potentially expensive operation, and should not be invoked except
280// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400281func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
282 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400283
284 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400285 depset := a.depsetIdToAqueryDepset[inputDepSetId]
286 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400287 if err != nil {
288 return nil, err
289 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400290 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400291 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400292 }
293 }
Wei Li455ba832021-11-04 22:58:12 +0000294
Chris Parsons1a7aca02022-04-25 22:35:15 -0400295 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400296}
297
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400298func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
299 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400300 return result, nil
301 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400302 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
303 result := depset.DirectArtifacts
304 for _, childHash := range depset.TransitiveDepSetHashes {
305 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400306 if err != nil {
307 return nil, err
308 }
309 result = append(result, childArtifactIds...)
310 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400311 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400312 return result, nil
313 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400314 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400315 }
316}
317
Chris Parsons1a7aca02022-04-25 22:35:15 -0400318// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400319// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400320// action graph, as described by the given action graph json proto.
321// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
322// are one-to-one with Bazel's depSetOfFiles objects.
323func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400324 var aqueryResult actionGraphContainer
325 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
326 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400327 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400328 }
329 aqueryHandler, err := newAqueryHandler(aqueryResult)
330 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400331 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400332 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500333
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400334 var buildStatements []BuildStatement
Usta Shresthaef922252022-06-02 14:23:02 -0400335 if aqueryHandler.bazelToolsDependencySentinelNeeded {
336 buildStatements = append(buildStatements, BuildStatement{
337 Command: fmt.Sprintf("touch '%s'", bazelToolsDependencySentinel),
338 OutputPaths: []string{bazelToolsDependencySentinel},
339 Mnemonic: bazelToolsDependencySentinel,
340 })
341 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400342
Chris Parsons8d6e4332021-02-22 16:13:50 -0500343 for _, actionEntry := range aqueryResult.Actions {
344 if shouldSkipAction(actionEntry) {
345 continue
346 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400347
Chris Parsons1a7aca02022-04-25 22:35:15 -0400348 var buildStatement BuildStatement
Liz Kammerc49e6822021-06-08 15:04:11 -0400349 if isSymlinkAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400350 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000351 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400352 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Wei Li455ba832021-11-04 22:58:12 +0000353 } else if isPythonZipperAction(actionEntry) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400354 buildStatement, err = aqueryHandler.pythonZipperActionBuildStatement(actionEntry, buildStatements)
Liz Kammerc49e6822021-06-08 15:04:11 -0400355 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400356 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
357 } else {
358 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
359 }
360
361 if err != nil {
362 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500363 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500364 buildStatements = append(buildStatements, buildStatement)
365 }
366
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400367 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400368 var depsets []AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400369 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
370 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
371 // Two depsets collide on hash. Ensure that their contents are identical.
372 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
Usta Shrestha16ac1352022-06-22 11:01:55 -0400373 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400374 }
375 } else {
376 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
377 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400378 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400379 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400380
381 // Build Statements and depsets must be sorted by their content hash to
382 // preserve determinism between builds (this will result in consistent ninja file
383 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
384 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
385 sort.Slice(buildStatements, func(i, j int) bool {
386 // For build statements, compare output lists. In Bazel, each output file
387 // may only have one action which generates it, so this will provide
388 // a deterministic ordering.
389 outputs_i := buildStatements[i].OutputPaths
390 outputs_j := buildStatements[j].OutputPaths
391 if len(outputs_i) != len(outputs_j) {
392 return len(outputs_i) < len(outputs_j)
393 }
394 if len(outputs_i) == 0 {
395 // No outputs for these actions, so compare commands.
396 return buildStatements[i].Command < buildStatements[j].Command
397 }
398 // There may be multiple outputs, but the output ordering is deterministic.
399 return outputs_i[0] < outputs_j[0]
400 })
401 sort.Slice(depsets, func(i, j int) bool {
402 return depsets[i].ContentHash < depsets[j].ContentHash
403 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400404 return buildStatements, depsets, nil
405}
406
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400407// depsetContentHash computes and returns a SHA256 checksum of the contents of
408// the given depset. This content hash may serve as the depset's identifier.
409// Using a content hash for an identifier is superior for determinism. (For example,
410// using an integer identifier which depends on the order in which the depsets are
411// created would result in nondeterministic depset IDs.)
412func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
413 h := sha256.New()
414 // Use newline as delimiter, as paths cannot contain newline.
415 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400416 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
417 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400418 return fullHash
419}
420
Usta Shrestha6298cc52022-05-27 17:40:21 -0400421func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
422 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400423 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400424 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400425 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400426 } else {
427 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400428 }
429 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400430 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400431}
432
Usta Shresthac2372492022-05-27 10:45:00 -0400433func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400434 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400435 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400436 if err != nil {
437 return BuildStatement{}, err
438 }
Usta Shresthac2372492022-05-27 10:45:00 -0400439 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400440 if err != nil {
441 return BuildStatement{}, err
442 }
443
444 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400445 Command: command,
446 Depfile: depfile,
447 OutputPaths: outputPaths,
448 InputDepsetHashes: inputDepsetHashes,
449 Env: actionEntry.EnvironmentVariables,
450 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400451 }
452 return buildStatement, nil
453}
454
Usta Shresthac2372492022-05-27 10:45:00 -0400455func (a *aqueryArtifactHandler) pythonZipperActionBuildStatement(actionEntry action, prevBuildStatements []BuildStatement) (BuildStatement, error) {
456 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400457 if err != nil {
458 return BuildStatement{}, err
459 }
Usta Shresthac2372492022-05-27 10:45:00 -0400460 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400461 if err != nil {
462 return BuildStatement{}, err
463 }
464
465 if len(inputPaths) < 1 || len(outputPaths) != 1 {
466 return BuildStatement{}, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
467 }
468 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
469 inputPaths, command = removePy3wrapperScript(inputPaths, command)
Usta Shresthaef922252022-06-02 14:23:02 -0400470 command = addCommandForPyBinaryRunfilesDir(command, outputPaths[0])
Chris Parsons1a7aca02022-04-25 22:35:15 -0400471 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
472 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
473 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
474 //
475 // The following logic relies on that Bazel aquery output returns actions in the order that
476 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
477 // in that order, the following logic might not find the build statement generated for Python binary
478 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
479 // See go/python-binary-host-mixed-build for more details.
480 pythonZipFilePath := outputPaths[0]
481 pyBinaryFound := false
Usta Shrestha6298cc52022-05-27 17:40:21 -0400482 for i := range prevBuildStatements {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400483 if len(prevBuildStatements[i].OutputPaths) == 1 && prevBuildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
484 prevBuildStatements[i].InputPaths = append(prevBuildStatements[i].InputPaths, pythonZipFilePath)
485 pyBinaryFound = true
486 }
487 }
488 if !pyBinaryFound {
489 return BuildStatement{}, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
490 }
491
492 buildStatement := BuildStatement{
493 Command: command,
494 Depfile: depfile,
495 OutputPaths: outputPaths,
496 InputPaths: inputPaths,
497 Env: actionEntry.EnvironmentVariables,
498 Mnemonic: actionEntry.Mnemonic,
499 }
500 return buildStatement, nil
501}
502
Usta Shresthac2372492022-05-27 10:45:00 -0400503func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
504 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400505 if err != nil {
506 return BuildStatement{}, err
507 }
508 if len(outputPaths) != 1 {
509 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
510 }
511 expandedTemplateContent := expandTemplateContent(actionEntry)
512 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
513 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
514 // change \n to space and mess up the format of Python programs.
515 // sed is used to convert \\n back to \n before saving to output file.
516 // See go/python-binary-host-mixed-build for more details.
517 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
518 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400519 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400520 if err != nil {
521 return BuildStatement{}, err
522 }
523
524 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400525 Command: command,
526 Depfile: depfile,
527 OutputPaths: outputPaths,
528 InputDepsetHashes: inputDepsetHashes,
529 Env: actionEntry.EnvironmentVariables,
530 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400531 }
532 return buildStatement, nil
533}
534
Usta Shresthac2372492022-05-27 10:45:00 -0400535func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
536 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400537 if err != nil {
538 return BuildStatement{}, err
539 }
540
Usta Shresthac2372492022-05-27 10:45:00 -0400541 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400542 if err != nil {
543 return BuildStatement{}, err
544 }
545 if len(inputPaths) != 1 || len(outputPaths) != 1 {
546 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
547 }
548 out := outputPaths[0]
549 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
550 out = proptools.ShellEscapeIncludingSpaces(out)
551 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
552 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
553 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
554 symlinkPaths := outputPaths[:]
555
556 buildStatement := BuildStatement{
557 Command: command,
558 Depfile: depfile,
559 OutputPaths: outputPaths,
560 InputPaths: inputPaths,
561 Env: actionEntry.EnvironmentVariables,
562 Mnemonic: actionEntry.Mnemonic,
563 SymlinkPaths: symlinkPaths,
564 }
565 return buildStatement, nil
566}
567
Usta Shresthac2372492022-05-27 10:45:00 -0400568func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400569 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400570 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400571 if !exists {
572 err = fmt.Errorf("undefined outputId %d", outputId)
573 return
574 }
575 ext := filepath.Ext(outputPath)
576 if ext == ".d" {
577 if depfile != nil {
578 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
579 return
580 } else {
581 depfile = &outputPath
582 }
583 } else {
584 outputPaths = append(outputPaths, outputPath)
585 }
586 }
587 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500588}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500589
Wei Li455ba832021-11-04 22:58:12 +0000590// expandTemplateContent substitutes the tokens in a template.
591func expandTemplateContent(actionEntry action) string {
592 replacerString := []string{}
593 for _, pair := range actionEntry.Substitutions {
594 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400595 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000596 value = val
597 }
598 replacerString = append(replacerString, pair.Key, value)
599 }
600 replacer := strings.NewReplacer(replacerString...)
601 return replacer.Replace(actionEntry.TemplateContent)
602}
603
604func escapeCommandlineArgument(str string) string {
605 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
606 replacer := strings.NewReplacer(
607 `\`, `\\`,
608 `$`, `\$`,
609 "`", "\\`",
610 `"`, `\"`,
611 "\n", "\\n",
612 `'`, `'"'"'`,
613 )
614 return replacer.Replace(str)
615}
616
617// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
618// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
619// there is no action returned by aquery for creating it. So in mixed build "python3" is used
620// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
621// removed from input paths and command of creating python zip file.
622// See go/python-binary-host-mixed-build for more details.
623// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400624func removePy3wrapperScript(inputPaths []string, command string) (newInputPaths []string, newCommand string) {
Wei Li455ba832021-11-04 22:58:12 +0000625 // Remove from inputs
Usta Shresthaef922252022-06-02 14:23:02 -0400626 var filteredInputPaths []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400627 for _, path := range inputPaths {
Wei Li455ba832021-11-04 22:58:12 +0000628 if !strings.HasSuffix(path, py3wrapperFileName) {
629 filteredInputPaths = append(filteredInputPaths, path)
630 }
631 }
632 newInputPaths = filteredInputPaths
633
634 // Remove from command line
635 var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400636 newCommand = re.ReplaceAllString(command, "")
Wei Li455ba832021-11-04 22:58:12 +0000637 return
638}
639
640// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
641// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
642// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
643// but since SourceSymlinkManifest doesn't contain sufficient information
644// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
645// See go/python-binary-host-mixed-build for more details.
646// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
Usta Shresthaef922252022-06-02 14:23:02 -0400647func addCommandForPyBinaryRunfilesDir(oldCommand string, zipFilePath string) string {
Wei Li455ba832021-11-04 22:58:12 +0000648 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
649 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
Usta Shresthaef922252022-06-02 14:23:02 -0400650 command := fmt.Sprintf("%s x %s -d %s", "../bazel_tools/tools/zip/zipper/zipper", zipFilePath, runfilesDirName)
Wei Li455ba832021-11-04 22:58:12 +0000651 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
652 // when running the python binary stub script.
653 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400654 return oldCommand + " && " + command
Wei Li455ba832021-11-04 22:58:12 +0000655}
656
Liz Kammerc49e6822021-06-08 15:04:11 -0400657func isSymlinkAction(a action) bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000658 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400659}
660
Wei Li455ba832021-11-04 22:58:12 +0000661func isTemplateExpandAction(a action) bool {
662 return a.Mnemonic == "TemplateExpand"
663}
664
665func isPythonZipperAction(a action) bool {
666 return a.Mnemonic == "PythonZipper"
667}
668
Chris Parsons8d6e4332021-02-22 16:13:50 -0500669func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400670 // TODO(b/180945121): Handle complex symlink actions.
671 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500672 return true
673 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400674 // Middleman actions are not handled like other actions; they are handled separately as a
675 // preparatory step so that their inputs may be relayed to actions depending on middleman
676 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500677 if a.Mnemonic == "Middleman" {
678 return true
679 }
680 // Skip "Fail" actions, which are placeholder actions designed to always fail.
681 if a.Mnemonic == "Fail" {
682 return true
683 }
684 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
685 // about the contents that are written.
686 if a.Mnemonic == "FileWrite" {
687 return true
688 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700689 if a.Mnemonic == "BaselineCoverage" {
690 return true
691 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500692 return false
693}
694
Usta Shrestha6298cc52022-05-27 17:40:21 -0400695func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
696 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500697 currId := id
698 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
699 for currId > 0 {
700 currFragment, ok := pathFragmentsMap[currId]
701 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500702 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500703 }
704 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400705 if currId == currFragment.ParentId {
706 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
707 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500708 currId = currFragment.ParentId
709 }
710 return filepath.Join(labels...), nil
711}