blob: 6af472a8d7494c83538a9242e11fe90306fa0c89 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050024 "strings"
25
Liz Kammer690fbac2023-02-10 11:11:17 -050026 analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
27
28 "github.com/google/blueprint/metrics"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050029 "github.com/google/blueprint/proptools"
Jason Wu118fd2b2022-10-27 18:41:15 +000030 "google.golang.org/protobuf/proto"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050031)
32
Usta Shrestha6298cc52022-05-27 17:40:21 -040033type artifactId int
34type depsetId int
35type pathFragmentId int
36
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050037// artifact contains relevant portions of Bazel's aquery proto, Artifact.
38// Represents a single artifact, whether it's a source file or a derived output file.
39type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040040 Id artifactId
41 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050042}
43
44type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050046 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040047 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050048}
49
50// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
51type KeyValuePair struct {
52 Key string
53 Value string
54}
55
Chris Parsons1a7aca02022-04-25 22:35:15 -040056// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040057// akin to the `depSetOfFiles` in the response proto, except:
Colin Crossd079e0b2022-08-16 10:27:33 -070058// - direct artifacts are enumerated by full path instead of by ID
59// - it has a hash of the depset contents, instead of an int ID (for determinism)
60//
Chris Parsons1a7aca02022-04-25 22:35:15 -040061// A depset is a data structure for efficient transitive handling of artifact
62// paths. A single depset consists of one or more artifact paths and one or
63// more "child" depsets.
64type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040065 ContentHash string
66 DirectArtifacts []string
67 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040068}
69
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050070// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
71// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
72// data structure for storing large numbers of file paths.
73type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040074 Id depsetId
75 DirectArtifactIds []artifactId
76 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050077}
78
79// action contains relevant portions of Bazel's aquery proto, Action.
80// Represents a single command line invocation in the Bazel build graph.
81type action struct {
82 Arguments []string
83 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040084 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050085 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040086 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000087 TemplateContent string
88 Substitutions []KeyValuePair
Sasha Smundak1da064c2022-06-08 16:36:16 -070089 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050090}
91
92// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
93// An aquery response from Bazel contains a single ActionGraphContainer proto.
94type actionGraphContainer struct {
95 Artifacts []artifact
96 Actions []action
97 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050098 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050099}
100
101// BuildStatement contains information to register a build statement corresponding (one to one)
102// with a Bazel action from Bazel's action graph.
103type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400104 Command string
105 Depfile *string
106 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400107 SymlinkPaths []string
108 Env []KeyValuePair
109 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400110
111 // Inputs of this build statement, either as unexpanded depsets or expanded
112 // input paths. There should be no overlap between these fields; an input
113 // path should either be included as part of an unexpanded depset or a raw
114 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400115 InputDepsetHashes []string
116 InputPaths []string
Sasha Smundak1da064c2022-06-08 16:36:16 -0700117 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500118}
119
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400120// A helper type for aquery processing which facilitates retrieval of path IDs from their
121// less readable Bazel structures (depset and path fragment).
122type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400123 // Maps depset id to AqueryDepset, a representation of depset which is
124 // post-processed for middleman artifact handling, unhandled artifact
125 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400126 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500127 emptyDepsetIds map[depsetId]struct{}
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400128 // Maps content hash to AqueryDepset.
129 depsetHashToAqueryDepset map[string]AqueryDepset
130
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400131 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
132 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400133 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400134 // Maps artifact ids to fully expanded paths.
135 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400136}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500137
Wei Li455ba832021-11-04 22:58:12 +0000138// The tokens should be substituted with the value specified here, instead of the
139// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400140var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000141 // Uses "python3" for %python_binary% instead of the value returned by aquery
142 // which is "py3wrapper.sh". See removePy3wrapperScript.
143 "%python_binary%": "python3",
144}
145
Wei Li455ba832021-11-04 22:58:12 +0000146// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400147const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000148
Usta Shrestha6298cc52022-05-27 17:40:21 -0400149func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
150 m := map[K]V{}
151 for _, v := range values {
152 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500153 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400154 return m
155}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400156
Usta Shrestha6298cc52022-05-27 17:40:21 -0400157func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
158 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
159 return pf.Id
160 })
161
162 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500163 for _, artifact := range aqueryResult.Artifacts {
164 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
165 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500166 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500167 }
168 artifactIdToPath[artifact.Id] = artifactPath
169 }
Chris Parsons943f2432021-01-19 11:36:50 -0500170
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400171 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400172 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400173 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400174 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
175 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400176 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500177 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500178 if actionEntry.Mnemonic == "Middleman" {
179 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400180 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500181 }
182 }
183 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400184
Usta Shrestha6298cc52022-05-27 17:40:21 -0400185 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
186 return d.Id
187 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400188
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400189 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400190 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400191 depsetHashToAqueryDepset: map[string]AqueryDepset{},
192 depsetHashToArtifactPathsCache: map[string][]string{},
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500193 emptyDepsetIds: make(map[depsetId]struct{}, 0),
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400194 artifactIdToPath: artifactIdToPath,
195 }
196
197 // Validate and adjust aqueryResult.DepSetOfFiles values.
198 for _, depset := range aqueryResult.DepSetOfFiles {
199 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
200 if err != nil {
201 return nil, err
202 }
203 }
204
205 return &aqueryHandler, nil
206}
207
208// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
209// depset.
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500210func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (*AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400211 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500212 return &aqueryDepset, nil
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400213 }
214 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400215 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400216 for _, artifactId := range depset.DirectArtifactIds {
217 path, pathExists := a.artifactIdToPath[artifactId]
218 if !pathExists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500219 return nil, fmt.Errorf("undefined input artifactId %d", artifactId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400220 }
221 // Filter out any inputs which are universally dropped, and swap middleman
222 // artifacts with their corresponding depsets.
223 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
224 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
225 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400226 } else if strings.HasSuffix(path, py3wrapperFileName) ||
Usta Shresthaef922252022-06-02 14:23:02 -0400227 strings.HasPrefix(path, "../bazel_tools") {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500228 continue
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400229 // Drop these artifacts.
230 // See go/python-binary-host-mixed-build for more details.
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700231 // 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
232 // TemplateExpandAction handles everything necessary to launch a Pythin application.
233 // 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
Usta Shresthaef922252022-06-02 14:23:02 -0400234 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400235 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400236 directArtifactPaths = append(directArtifactPaths, path)
237 }
238 }
239
Usta Shrestha6298cc52022-05-27 17:40:21 -0400240 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400241 for _, childDepsetId := range transitiveDepsetIds {
242 childDepset, exists := depsetIdToDepset[childDepsetId]
243 if !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500244 if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
245 continue
246 } else {
247 return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
248 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400249 }
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500250 if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
251 return nil, err
252 } else if childAqueryDepset == nil {
253 continue
254 } else {
255 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400256 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400257 }
Usta Shresthaef922252022-06-02 14:23:02 -0400258 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500259 a.emptyDepsetIds[depset.Id] = struct{}{}
260 return nil, nil
Usta Shresthaef922252022-06-02 14:23:02 -0400261 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400262 aqueryDepset := AqueryDepset{
263 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
264 DirectArtifacts: directArtifactPaths,
265 TransitiveDepSetHashes: childDepsetHashes,
266 }
267 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
268 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500269 return &aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400270}
271
Chris Parsons1a7aca02022-04-25 22:35:15 -0400272// getInputPaths flattens the depsets of the given IDs and returns all transitive
273// input paths contained in these depsets.
274// This is a potentially expensive operation, and should not be invoked except
275// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400276func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
277 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400278
279 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400280 depset := a.depsetIdToAqueryDepset[inputDepSetId]
281 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400282 if err != nil {
283 return nil, err
284 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400285 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400286 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400287 }
288 }
Wei Li455ba832021-11-04 22:58:12 +0000289
Chris Parsons1a7aca02022-04-25 22:35:15 -0400290 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400291}
292
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400293func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
294 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400295 return result, nil
296 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400297 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
298 result := depset.DirectArtifacts
299 for _, childHash := range depset.TransitiveDepSetHashes {
300 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400301 if err != nil {
302 return nil, err
303 }
304 result = append(result, childArtifactIds...)
305 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400306 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400307 return result, nil
308 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400309 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400310 }
311}
312
Chris Parsons1a7aca02022-04-25 22:35:15 -0400313// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400314// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400315// action graph, as described by the given action graph json proto.
316// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
317// are one-to-one with Bazel's depSetOfFiles objects.
Liz Kammer690fbac2023-02-10 11:11:17 -0500318func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]BuildStatement, []AqueryDepset, error) {
Jason Wu118fd2b2022-10-27 18:41:15 +0000319 aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
320 err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400321 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400322 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400323 }
Jason Wu118fd2b2022-10-27 18:41:15 +0000324 aqueryResult := actionGraphContainer{}
325
326 for _, protoArtifact := range aqueryProto.Artifacts {
327 aqueryResult.Artifacts = append(aqueryResult.Artifacts, artifact{artifactId(protoArtifact.Id),
328 pathFragmentId(protoArtifact.PathFragmentId)})
329 }
330
331 for _, protoAction := range aqueryProto.Actions {
332 var environmentVariable []KeyValuePair
333 var inputDepSetIds []depsetId
334 var outputIds []artifactId
335 var substitutions []KeyValuePair
336
337 for _, protoEnvironmentVariable := range protoAction.EnvironmentVariables {
338 environmentVariable = append(environmentVariable, KeyValuePair{
339 protoEnvironmentVariable.Key, protoEnvironmentVariable.Value,
340 })
341 }
342 for _, protoInputDepSetIds := range protoAction.InputDepSetIds {
343 inputDepSetIds = append(inputDepSetIds, depsetId(protoInputDepSetIds))
344 }
345 for _, protoOutputIds := range protoAction.OutputIds {
346 outputIds = append(outputIds, artifactId(protoOutputIds))
347 }
348 for _, protoSubstitutions := range protoAction.Substitutions {
349 substitutions = append(substitutions, KeyValuePair{
350 protoSubstitutions.Key, protoSubstitutions.Value,
351 })
352 }
353
354 aqueryResult.Actions = append(aqueryResult.Actions,
355 action{
356 Arguments: protoAction.Arguments,
357 EnvironmentVariables: environmentVariable,
358 InputDepSetIds: inputDepSetIds,
359 Mnemonic: protoAction.Mnemonic,
360 OutputIds: outputIds,
361 TemplateContent: protoAction.TemplateContent,
362 Substitutions: substitutions,
363 FileContents: protoAction.FileContents})
364 }
365
366 for _, protoDepSetOfFiles := range aqueryProto.DepSetOfFiles {
367 var directArtifactIds []artifactId
368 var transitiveDepSetIds []depsetId
369
370 for _, protoDirectArtifactIds := range protoDepSetOfFiles.DirectArtifactIds {
371 directArtifactIds = append(directArtifactIds, artifactId(protoDirectArtifactIds))
372 }
373 for _, protoTransitiveDepSetIds := range protoDepSetOfFiles.TransitiveDepSetIds {
374 transitiveDepSetIds = append(transitiveDepSetIds, depsetId(protoTransitiveDepSetIds))
375 }
376 aqueryResult.DepSetOfFiles = append(aqueryResult.DepSetOfFiles,
377 depSetOfFiles{
378 Id: depsetId(protoDepSetOfFiles.Id),
379 DirectArtifactIds: directArtifactIds,
380 TransitiveDepSetIds: transitiveDepSetIds})
381
382 }
383
384 for _, protoPathFragments := range aqueryProto.PathFragments {
385 aqueryResult.PathFragments = append(aqueryResult.PathFragments,
386 pathFragment{
387 Id: pathFragmentId(protoPathFragments.Id),
388 Label: protoPathFragments.Label,
389 ParentId: pathFragmentId(protoPathFragments.ParentId)})
390
391 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500392
Liz Kammer690fbac2023-02-10 11:11:17 -0500393 var aqueryHandler *aqueryArtifactHandler
394 {
395 eventHandler.Begin("init_handler")
396 defer eventHandler.End("init_handler")
397 aqueryHandler, err = newAqueryHandler(aqueryResult)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400398 if err != nil {
399 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500400 }
Liz Kammer690fbac2023-02-10 11:11:17 -0500401 }
402
403 var buildStatements []BuildStatement
404 {
405 eventHandler.Begin("build_statements")
406 defer eventHandler.End("build_statements")
407 for _, actionEntry := range aqueryResult.Actions {
408 if shouldSkipAction(actionEntry) {
409 continue
410 }
411
412 var buildStatement BuildStatement
413 if actionEntry.isSymlinkAction() {
414 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
415 } else if actionEntry.isTemplateExpandAction() && len(actionEntry.Arguments) < 1 {
416 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
417 } else if actionEntry.isFileWriteAction() {
418 buildStatement, err = aqueryHandler.fileWriteActionBuildStatement(actionEntry)
419 } else if actionEntry.isSymlinkTreeAction() {
420 buildStatement, err = aqueryHandler.symlinkTreeActionBuildStatement(actionEntry)
421 } else if len(actionEntry.Arguments) < 1 {
422 err = fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
423 } else {
424 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
425 }
426
427 if err != nil {
428 return nil, nil, err
429 }
430 buildStatements = append(buildStatements, buildStatement)
431 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500432 }
433
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400434 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400435 var depsets []AqueryDepset
Liz Kammer690fbac2023-02-10 11:11:17 -0500436 {
437 eventHandler.Begin("depsets")
438 defer eventHandler.End("depsets")
439 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
440 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
441 // Two depsets collide on hash. Ensure that their contents are identical.
442 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
443 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
444 }
445 } else {
446 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
447 depsets = append(depsets, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400448 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400449 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400450 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400451
Liz Kammer690fbac2023-02-10 11:11:17 -0500452 eventHandler.Do("build_statement_sort", func() {
453 // Build Statements and depsets must be sorted by their content hash to
454 // preserve determinism between builds (this will result in consistent ninja file
455 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
456 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
457 sort.Slice(buildStatements, func(i, j int) bool {
458 // For build statements, compare output lists. In Bazel, each output file
459 // may only have one action which generates it, so this will provide
460 // a deterministic ordering.
461 outputs_i := buildStatements[i].OutputPaths
462 outputs_j := buildStatements[j].OutputPaths
463 if len(outputs_i) != len(outputs_j) {
464 return len(outputs_i) < len(outputs_j)
465 }
466 if len(outputs_i) == 0 {
467 // No outputs for these actions, so compare commands.
468 return buildStatements[i].Command < buildStatements[j].Command
469 }
470 // There may be multiple outputs, but the output ordering is deterministic.
471 return outputs_i[0] < outputs_j[0]
472 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400473 })
Liz Kammer690fbac2023-02-10 11:11:17 -0500474 eventHandler.Do("depset_sort", func() {
475 sort.Slice(depsets, func(i, j int) bool {
476 return depsets[i].ContentHash < depsets[j].ContentHash
477 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400478 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400479 return buildStatements, depsets, nil
480}
481
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400482// depsetContentHash computes and returns a SHA256 checksum of the contents of
483// the given depset. This content hash may serve as the depset's identifier.
484// Using a content hash for an identifier is superior for determinism. (For example,
485// using an integer identifier which depends on the order in which the depsets are
486// created would result in nondeterministic depset IDs.)
487func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
488 h := sha256.New()
489 // Use newline as delimiter, as paths cannot contain newline.
490 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400491 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
492 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400493 return fullHash
494}
495
Usta Shrestha6298cc52022-05-27 17:40:21 -0400496func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
497 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400498 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400499 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500500 if _, empty := a.emptyDepsetIds[depsetId]; !empty {
501 return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", depsetId)
502 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400503 } else {
504 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400505 }
506 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400507 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400508}
509
Usta Shresthac2372492022-05-27 10:45:00 -0400510func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400511 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400512 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400513 if err != nil {
514 return BuildStatement{}, err
515 }
Usta Shresthac2372492022-05-27 10:45:00 -0400516 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400517 if err != nil {
518 return BuildStatement{}, err
519 }
520
521 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400522 Command: command,
523 Depfile: depfile,
524 OutputPaths: outputPaths,
525 InputDepsetHashes: inputDepsetHashes,
526 Env: actionEntry.EnvironmentVariables,
527 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400528 }
529 return buildStatement, nil
530}
531
Usta Shresthac2372492022-05-27 10:45:00 -0400532func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
533 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400534 if err != nil {
535 return BuildStatement{}, err
536 }
537 if len(outputPaths) != 1 {
538 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
539 }
540 expandedTemplateContent := expandTemplateContent(actionEntry)
541 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
542 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
543 // change \n to space and mess up the format of Python programs.
544 // sed is used to convert \\n back to \n before saving to output file.
545 // See go/python-binary-host-mixed-build for more details.
546 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
547 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400548 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400549 if err != nil {
550 return BuildStatement{}, err
551 }
552
553 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400554 Command: command,
555 Depfile: depfile,
556 OutputPaths: outputPaths,
557 InputDepsetHashes: inputDepsetHashes,
558 Env: actionEntry.EnvironmentVariables,
559 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400560 }
561 return buildStatement, nil
562}
563
Sasha Smundak1da064c2022-06-08 16:36:16 -0700564func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry action) (BuildStatement, error) {
565 outputPaths, _, err := a.getOutputPaths(actionEntry)
566 var depsetHashes []string
567 if err == nil {
568 depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
569 }
570 if err != nil {
571 return BuildStatement{}, err
572 }
573 return BuildStatement{
574 Depfile: nil,
575 OutputPaths: outputPaths,
576 Env: actionEntry.EnvironmentVariables,
577 Mnemonic: actionEntry.Mnemonic,
578 InputDepsetHashes: depsetHashes,
579 FileContents: actionEntry.FileContents,
580 }, nil
581}
582
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700583func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry action) (BuildStatement, error) {
584 outputPaths, _, err := a.getOutputPaths(actionEntry)
585 if err != nil {
586 return BuildStatement{}, err
587 }
588 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
589 if err != nil {
590 return BuildStatement{}, err
591 }
592 if len(inputPaths) != 1 || len(outputPaths) != 1 {
593 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
594 }
595 // The actual command is generated in bazelSingleton.GenerateBuildActions
596 return BuildStatement{
597 Depfile: nil,
598 OutputPaths: outputPaths,
599 Env: actionEntry.EnvironmentVariables,
600 Mnemonic: actionEntry.Mnemonic,
601 InputPaths: inputPaths,
602 }, nil
603}
604
Usta Shresthac2372492022-05-27 10:45:00 -0400605func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
606 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400607 if err != nil {
608 return BuildStatement{}, err
609 }
610
Usta Shresthac2372492022-05-27 10:45:00 -0400611 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400612 if err != nil {
613 return BuildStatement{}, err
614 }
615 if len(inputPaths) != 1 || len(outputPaths) != 1 {
616 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
617 }
618 out := outputPaths[0]
619 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
620 out = proptools.ShellEscapeIncludingSpaces(out)
621 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
622 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
623 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
624 symlinkPaths := outputPaths[:]
625
626 buildStatement := BuildStatement{
627 Command: command,
628 Depfile: depfile,
629 OutputPaths: outputPaths,
630 InputPaths: inputPaths,
631 Env: actionEntry.EnvironmentVariables,
632 Mnemonic: actionEntry.Mnemonic,
633 SymlinkPaths: symlinkPaths,
634 }
635 return buildStatement, nil
636}
637
Usta Shresthac2372492022-05-27 10:45:00 -0400638func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400639 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400640 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400641 if !exists {
642 err = fmt.Errorf("undefined outputId %d", outputId)
643 return
644 }
645 ext := filepath.Ext(outputPath)
646 if ext == ".d" {
647 if depfile != nil {
648 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
649 return
650 } else {
651 depfile = &outputPath
652 }
653 } else {
654 outputPaths = append(outputPaths, outputPath)
655 }
656 }
657 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500658}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500659
Wei Li455ba832021-11-04 22:58:12 +0000660// expandTemplateContent substitutes the tokens in a template.
661func expandTemplateContent(actionEntry action) string {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700662 var replacerString []string
Wei Li455ba832021-11-04 22:58:12 +0000663 for _, pair := range actionEntry.Substitutions {
664 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400665 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000666 value = val
667 }
668 replacerString = append(replacerString, pair.Key, value)
669 }
670 replacer := strings.NewReplacer(replacerString...)
671 return replacer.Replace(actionEntry.TemplateContent)
672}
673
Liz Kammerf15a0792023-02-09 14:28:36 -0500674// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
675var commandLineArgumentReplacer = strings.NewReplacer(
676 `\`, `\\`,
677 `$`, `\$`,
678 "`", "\\`",
679 `"`, `\"`,
680 "\n", "\\n",
681 `'`, `'"'"'`,
682)
683
Wei Li455ba832021-11-04 22:58:12 +0000684func escapeCommandlineArgument(str string) string {
Liz Kammerf15a0792023-02-09 14:28:36 -0500685 return commandLineArgumentReplacer.Replace(str)
Wei Li455ba832021-11-04 22:58:12 +0000686}
687
Sasha Smundak1da064c2022-06-08 16:36:16 -0700688func (a action) isSymlinkAction() bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000689 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400690}
691
Sasha Smundak1da064c2022-06-08 16:36:16 -0700692func (a action) isTemplateExpandAction() bool {
Wei Li455ba832021-11-04 22:58:12 +0000693 return a.Mnemonic == "TemplateExpand"
694}
695
Sasha Smundak1da064c2022-06-08 16:36:16 -0700696func (a action) isFileWriteAction() bool {
697 return a.Mnemonic == "FileWrite" || a.Mnemonic == "SourceSymlinkManifest"
698}
699
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700700func (a action) isSymlinkTreeAction() bool {
701 return a.Mnemonic == "SymlinkTree"
702}
703
Chris Parsons8d6e4332021-02-22 16:13:50 -0500704func shouldSkipAction(a action) bool {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400705 // Middleman actions are not handled like other actions; they are handled separately as a
706 // preparatory step so that their inputs may be relayed to actions depending on middleman
707 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500708 if a.Mnemonic == "Middleman" {
709 return true
710 }
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700711 // PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
712 if a.Mnemonic == "PythonZipper" {
713 return true
714 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500715 // Skip "Fail" actions, which are placeholder actions designed to always fail.
716 if a.Mnemonic == "Fail" {
717 return true
718 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700719 if a.Mnemonic == "BaselineCoverage" {
720 return true
721 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500722 return false
723}
724
Usta Shrestha6298cc52022-05-27 17:40:21 -0400725func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
726 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500727 currId := id
728 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
729 for currId > 0 {
730 currFragment, ok := pathFragmentsMap[currId]
731 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500732 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500733 }
734 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400735 if currId == currFragment.ParentId {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700736 return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
Liz Kammerc49e6822021-06-08 15:04:11 -0400737 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500738 currId = currFragment.ParentId
739 }
740 return filepath.Join(labels...), nil
741}