blob: 80cf70a431d6e8333dcbb41ddbe5910d513107ae [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050024 "strings"
25
26 "github.com/google/blueprint/proptools"
Jason Wu118fd2b2022-10-27 18:41:15 +000027 "google.golang.org/protobuf/proto"
28 analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050029)
30
Usta Shrestha6298cc52022-05-27 17:40:21 -040031type artifactId int
32type depsetId int
33type pathFragmentId int
34
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050035// artifact contains relevant portions of Bazel's aquery proto, Artifact.
36// Represents a single artifact, whether it's a source file or a derived output file.
37type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040038 Id artifactId
39 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050040}
41
42type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040043 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050044 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050046}
47
48// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
49type KeyValuePair struct {
50 Key string
51 Value string
52}
53
Chris Parsons1a7aca02022-04-25 22:35:15 -040054// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040055// akin to the `depSetOfFiles` in the response proto, except:
Colin Crossd079e0b2022-08-16 10:27:33 -070056// - direct artifacts are enumerated by full path instead of by ID
57// - it has a hash of the depset contents, instead of an int ID (for determinism)
58//
Chris Parsons1a7aca02022-04-25 22:35:15 -040059// A depset is a data structure for efficient transitive handling of artifact
60// paths. A single depset consists of one or more artifact paths and one or
61// more "child" depsets.
62type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040063 ContentHash string
64 DirectArtifacts []string
65 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040066}
67
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050068// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
69// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
70// data structure for storing large numbers of file paths.
71type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040072 Id depsetId
73 DirectArtifactIds []artifactId
74 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050075}
76
77// action contains relevant portions of Bazel's aquery proto, Action.
78// Represents a single command line invocation in the Bazel build graph.
79type action struct {
80 Arguments []string
81 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040082 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050083 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040084 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000085 TemplateContent string
86 Substitutions []KeyValuePair
Sasha Smundak1da064c2022-06-08 16:36:16 -070087 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050088}
89
90// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
91// An aquery response from Bazel contains a single ActionGraphContainer proto.
92type actionGraphContainer struct {
93 Artifacts []artifact
94 Actions []action
95 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050096 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050097}
98
99// BuildStatement contains information to register a build statement corresponding (one to one)
100// with a Bazel action from Bazel's action graph.
101type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400102 Command string
103 Depfile *string
104 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400105 SymlinkPaths []string
106 Env []KeyValuePair
107 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400108
109 // Inputs of this build statement, either as unexpanded depsets or expanded
110 // input paths. There should be no overlap between these fields; an input
111 // path should either be included as part of an unexpanded depset or a raw
112 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400113 InputDepsetHashes []string
114 InputPaths []string
Sasha Smundak1da064c2022-06-08 16:36:16 -0700115 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500116}
117
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400118// A helper type for aquery processing which facilitates retrieval of path IDs from their
119// less readable Bazel structures (depset and path fragment).
120type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400121 // Maps depset id to AqueryDepset, a representation of depset which is
122 // post-processed for middleman artifact handling, unhandled artifact
123 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400124 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500125 emptyDepsetIds map[depsetId]struct{}
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400126 // Maps content hash to AqueryDepset.
127 depsetHashToAqueryDepset map[string]AqueryDepset
128
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400129 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
130 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400131 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400132 // Maps artifact ids to fully expanded paths.
133 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400134}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500135
Wei Li455ba832021-11-04 22:58:12 +0000136// The tokens should be substituted with the value specified here, instead of the
137// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400138var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000139 // Uses "python3" for %python_binary% instead of the value returned by aquery
140 // which is "py3wrapper.sh". See removePy3wrapperScript.
141 "%python_binary%": "python3",
142}
143
Wei Li455ba832021-11-04 22:58:12 +0000144// The file name of py3wrapper.sh, which is used by py_binary targets.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400145const py3wrapperFileName = "/py3wrapper.sh"
Wei Li455ba832021-11-04 22:58:12 +0000146
Usta Shrestha6298cc52022-05-27 17:40:21 -0400147func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
148 m := map[K]V{}
149 for _, v := range values {
150 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500151 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400152 return m
153}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400154
Usta Shrestha6298cc52022-05-27 17:40:21 -0400155func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
156 pathFragments := indexBy(aqueryResult.PathFragments, func(pf pathFragment) pathFragmentId {
157 return pf.Id
158 })
159
160 artifactIdToPath := map[artifactId]string{}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500161 for _, artifact := range aqueryResult.Artifacts {
162 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
163 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500164 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500165 }
166 artifactIdToPath[artifact.Id] = artifactPath
167 }
Chris Parsons943f2432021-01-19 11:36:50 -0500168
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400169 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400170 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400171 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400172 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
173 // that action instead.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400174 middlemanIdToDepsetIds := map[artifactId][]depsetId{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500175 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500176 if actionEntry.Mnemonic == "Middleman" {
177 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400178 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500179 }
180 }
181 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400182
Usta Shrestha6298cc52022-05-27 17:40:21 -0400183 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d depSetOfFiles) depsetId {
184 return d.Id
185 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400186
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400187 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400188 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400189 depsetHashToAqueryDepset: map[string]AqueryDepset{},
190 depsetHashToArtifactPathsCache: map[string][]string{},
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500191 emptyDepsetIds: make(map[depsetId]struct{}, 0),
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400192 artifactIdToPath: artifactIdToPath,
193 }
194
195 // Validate and adjust aqueryResult.DepSetOfFiles values.
196 for _, depset := range aqueryResult.DepSetOfFiles {
197 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
198 if err != nil {
199 return nil, err
200 }
201 }
202
203 return &aqueryHandler, nil
204}
205
206// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
207// depset.
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500208func (a *aqueryArtifactHandler) populateDepsetMaps(depset depSetOfFiles, middlemanIdToDepsetIds map[artifactId][]depsetId, depsetIdToDepset map[depsetId]depSetOfFiles) (*AqueryDepset, error) {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400209 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depset.Id]; containsDepset {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500210 return &aqueryDepset, nil
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400211 }
212 transitiveDepsetIds := depset.TransitiveDepSetIds
Usta Shrestha6298cc52022-05-27 17:40:21 -0400213 var directArtifactPaths []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400214 for _, artifactId := range depset.DirectArtifactIds {
215 path, pathExists := a.artifactIdToPath[artifactId]
216 if !pathExists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500217 return nil, fmt.Errorf("undefined input artifactId %d", artifactId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400218 }
219 // Filter out any inputs which are universally dropped, and swap middleman
220 // artifacts with their corresponding depsets.
221 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[artifactId]; isMiddleman {
222 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
223 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400224 } else if strings.HasSuffix(path, py3wrapperFileName) ||
Usta Shresthaef922252022-06-02 14:23:02 -0400225 strings.HasPrefix(path, "../bazel_tools") {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500226 continue
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400227 // Drop these artifacts.
228 // See go/python-binary-host-mixed-build for more details.
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700229 // 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
230 // TemplateExpandAction handles everything necessary to launch a Pythin application.
231 // 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
Usta Shresthaef922252022-06-02 14:23:02 -0400232 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400233 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400234 directArtifactPaths = append(directArtifactPaths, path)
235 }
236 }
237
Usta Shrestha6298cc52022-05-27 17:40:21 -0400238 var childDepsetHashes []string
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400239 for _, childDepsetId := range transitiveDepsetIds {
240 childDepset, exists := depsetIdToDepset[childDepsetId]
241 if !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500242 if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
243 continue
244 } else {
245 return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
246 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400247 }
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500248 if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
249 return nil, err
250 } else if childAqueryDepset == nil {
251 continue
252 } else {
253 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400254 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400255 }
Usta Shresthaef922252022-06-02 14:23:02 -0400256 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500257 a.emptyDepsetIds[depset.Id] = struct{}{}
258 return nil, nil
Usta Shresthaef922252022-06-02 14:23:02 -0400259 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400260 aqueryDepset := AqueryDepset{
261 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
262 DirectArtifacts: directArtifactPaths,
263 TransitiveDepSetHashes: childDepsetHashes,
264 }
265 a.depsetIdToAqueryDepset[depset.Id] = aqueryDepset
266 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500267 return &aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400268}
269
Chris Parsons1a7aca02022-04-25 22:35:15 -0400270// getInputPaths flattens the depsets of the given IDs and returns all transitive
271// input paths contained in these depsets.
272// This is a potentially expensive operation, and should not be invoked except
273// for actions which need specialized input handling.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400274func (a *aqueryArtifactHandler) getInputPaths(depsetIds []depsetId) ([]string, error) {
275 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400276
277 for _, inputDepSetId := range depsetIds {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400278 depset := a.depsetIdToAqueryDepset[inputDepSetId]
279 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400280 if err != nil {
281 return nil, err
282 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400283 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400284 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400285 }
286 }
Wei Li455ba832021-11-04 22:58:12 +0000287
Chris Parsons1a7aca02022-04-25 22:35:15 -0400288 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400289}
290
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400291func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
292 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400293 return result, nil
294 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400295 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
296 result := depset.DirectArtifacts
297 for _, childHash := range depset.TransitiveDepSetHashes {
298 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400299 if err != nil {
300 return nil, err
301 }
302 result = append(result, childArtifactIds...)
303 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400304 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400305 return result, nil
306 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400307 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400308 }
309}
310
Chris Parsons1a7aca02022-04-25 22:35:15 -0400311// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400312// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400313// action graph, as described by the given action graph json proto.
314// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
315// are one-to-one with Bazel's depSetOfFiles objects.
316func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, []AqueryDepset, error) {
Jason Wu118fd2b2022-10-27 18:41:15 +0000317 aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
318 err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400319 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400320 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400321 }
Jason Wu118fd2b2022-10-27 18:41:15 +0000322 aqueryResult := actionGraphContainer{}
323
324 for _, protoArtifact := range aqueryProto.Artifacts {
325 aqueryResult.Artifacts = append(aqueryResult.Artifacts, artifact{artifactId(protoArtifact.Id),
326 pathFragmentId(protoArtifact.PathFragmentId)})
327 }
328
329 for _, protoAction := range aqueryProto.Actions {
330 var environmentVariable []KeyValuePair
331 var inputDepSetIds []depsetId
332 var outputIds []artifactId
333 var substitutions []KeyValuePair
334
335 for _, protoEnvironmentVariable := range protoAction.EnvironmentVariables {
336 environmentVariable = append(environmentVariable, KeyValuePair{
337 protoEnvironmentVariable.Key, protoEnvironmentVariable.Value,
338 })
339 }
340 for _, protoInputDepSetIds := range protoAction.InputDepSetIds {
341 inputDepSetIds = append(inputDepSetIds, depsetId(protoInputDepSetIds))
342 }
343 for _, protoOutputIds := range protoAction.OutputIds {
344 outputIds = append(outputIds, artifactId(protoOutputIds))
345 }
346 for _, protoSubstitutions := range protoAction.Substitutions {
347 substitutions = append(substitutions, KeyValuePair{
348 protoSubstitutions.Key, protoSubstitutions.Value,
349 })
350 }
351
352 aqueryResult.Actions = append(aqueryResult.Actions,
353 action{
354 Arguments: protoAction.Arguments,
355 EnvironmentVariables: environmentVariable,
356 InputDepSetIds: inputDepSetIds,
357 Mnemonic: protoAction.Mnemonic,
358 OutputIds: outputIds,
359 TemplateContent: protoAction.TemplateContent,
360 Substitutions: substitutions,
361 FileContents: protoAction.FileContents})
362 }
363
364 for _, protoDepSetOfFiles := range aqueryProto.DepSetOfFiles {
365 var directArtifactIds []artifactId
366 var transitiveDepSetIds []depsetId
367
368 for _, protoDirectArtifactIds := range protoDepSetOfFiles.DirectArtifactIds {
369 directArtifactIds = append(directArtifactIds, artifactId(protoDirectArtifactIds))
370 }
371 for _, protoTransitiveDepSetIds := range protoDepSetOfFiles.TransitiveDepSetIds {
372 transitiveDepSetIds = append(transitiveDepSetIds, depsetId(protoTransitiveDepSetIds))
373 }
374 aqueryResult.DepSetOfFiles = append(aqueryResult.DepSetOfFiles,
375 depSetOfFiles{
376 Id: depsetId(protoDepSetOfFiles.Id),
377 DirectArtifactIds: directArtifactIds,
378 TransitiveDepSetIds: transitiveDepSetIds})
379
380 }
381
382 for _, protoPathFragments := range aqueryProto.PathFragments {
383 aqueryResult.PathFragments = append(aqueryResult.PathFragments,
384 pathFragment{
385 Id: pathFragmentId(protoPathFragments.Id),
386 Label: protoPathFragments.Label,
387 ParentId: pathFragmentId(protoPathFragments.ParentId)})
388
389 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400390 aqueryHandler, err := newAqueryHandler(aqueryResult)
391 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400392 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400393 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500394
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400395 var buildStatements []BuildStatement
Chris Parsons8d6e4332021-02-22 16:13:50 -0500396 for _, actionEntry := range aqueryResult.Actions {
397 if shouldSkipAction(actionEntry) {
398 continue
399 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400400
Chris Parsons1a7aca02022-04-25 22:35:15 -0400401 var buildStatement BuildStatement
Sasha Smundak1da064c2022-06-08 16:36:16 -0700402 if actionEntry.isSymlinkAction() {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400403 buildStatement, err = aqueryHandler.symlinkActionBuildStatement(actionEntry)
Sasha Smundak1da064c2022-06-08 16:36:16 -0700404 } else if actionEntry.isTemplateExpandAction() && len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400405 buildStatement, err = aqueryHandler.templateExpandActionBuildStatement(actionEntry)
Sasha Smundak1da064c2022-06-08 16:36:16 -0700406 } else if actionEntry.isFileWriteAction() {
407 buildStatement, err = aqueryHandler.fileWriteActionBuildStatement(actionEntry)
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700408 } else if actionEntry.isSymlinkTreeAction() {
409 buildStatement, err = aqueryHandler.symlinkTreeActionBuildStatement(actionEntry)
Liz Kammerc49e6822021-06-08 15:04:11 -0400410 } else if len(actionEntry.Arguments) < 1 {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400411 return nil, nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
412 } else {
413 buildStatement, err = aqueryHandler.normalActionBuildStatement(actionEntry)
414 }
415
416 if err != nil {
417 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500418 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500419 buildStatements = append(buildStatements, buildStatement)
420 }
421
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400422 depsetsByHash := map[string]AqueryDepset{}
Usta Shrestha6298cc52022-05-27 17:40:21 -0400423 var depsets []AqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400424 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
425 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
426 // Two depsets collide on hash. Ensure that their contents are identical.
427 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
Usta Shrestha16ac1352022-06-22 11:01:55 -0400428 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400429 }
430 } else {
431 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
432 depsets = append(depsets, aqueryDepset)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400433 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400434 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400435
436 // Build Statements and depsets must be sorted by their content hash to
437 // preserve determinism between builds (this will result in consistent ninja file
438 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
439 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
440 sort.Slice(buildStatements, func(i, j int) bool {
441 // For build statements, compare output lists. In Bazel, each output file
442 // may only have one action which generates it, so this will provide
443 // a deterministic ordering.
444 outputs_i := buildStatements[i].OutputPaths
445 outputs_j := buildStatements[j].OutputPaths
446 if len(outputs_i) != len(outputs_j) {
447 return len(outputs_i) < len(outputs_j)
448 }
449 if len(outputs_i) == 0 {
450 // No outputs for these actions, so compare commands.
451 return buildStatements[i].Command < buildStatements[j].Command
452 }
453 // There may be multiple outputs, but the output ordering is deterministic.
454 return outputs_i[0] < outputs_j[0]
455 })
456 sort.Slice(depsets, func(i, j int) bool {
457 return depsets[i].ContentHash < depsets[j].ContentHash
458 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400459 return buildStatements, depsets, nil
460}
461
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400462// depsetContentHash computes and returns a SHA256 checksum of the contents of
463// the given depset. This content hash may serve as the depset's identifier.
464// Using a content hash for an identifier is superior for determinism. (For example,
465// using an integer identifier which depends on the order in which the depsets are
466// created would result in nondeterministic depset IDs.)
467func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
468 h := sha256.New()
469 // Use newline as delimiter, as paths cannot contain newline.
470 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400471 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
472 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400473 return fullHash
474}
475
Usta Shrestha6298cc52022-05-27 17:40:21 -0400476func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []depsetId) ([]string, error) {
477 var hashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400478 for _, depsetId := range inputDepsetIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400479 if aqueryDepset, exists := a.depsetIdToAqueryDepset[depsetId]; !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500480 if _, empty := a.emptyDepsetIds[depsetId]; !empty {
481 return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", depsetId)
482 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400483 } else {
484 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400485 }
486 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400487 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400488}
489
Usta Shresthac2372492022-05-27 10:45:00 -0400490func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry action) (BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400491 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400492 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400493 if err != nil {
494 return BuildStatement{}, err
495 }
Usta Shresthac2372492022-05-27 10:45:00 -0400496 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400497 if err != nil {
498 return BuildStatement{}, err
499 }
500
501 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400502 Command: command,
503 Depfile: depfile,
504 OutputPaths: outputPaths,
505 InputDepsetHashes: inputDepsetHashes,
506 Env: actionEntry.EnvironmentVariables,
507 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400508 }
509 return buildStatement, nil
510}
511
Usta Shresthac2372492022-05-27 10:45:00 -0400512func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry action) (BuildStatement, error) {
513 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400514 if err != nil {
515 return BuildStatement{}, err
516 }
517 if len(outputPaths) != 1 {
518 return BuildStatement{}, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
519 }
520 expandedTemplateContent := expandTemplateContent(actionEntry)
521 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
522 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
523 // change \n to space and mess up the format of Python programs.
524 // sed is used to convert \\n back to \n before saving to output file.
525 // See go/python-binary-host-mixed-build for more details.
526 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
527 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400528 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400529 if err != nil {
530 return BuildStatement{}, err
531 }
532
533 buildStatement := BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400534 Command: command,
535 Depfile: depfile,
536 OutputPaths: outputPaths,
537 InputDepsetHashes: inputDepsetHashes,
538 Env: actionEntry.EnvironmentVariables,
539 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400540 }
541 return buildStatement, nil
542}
543
Sasha Smundak1da064c2022-06-08 16:36:16 -0700544func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry action) (BuildStatement, error) {
545 outputPaths, _, err := a.getOutputPaths(actionEntry)
546 var depsetHashes []string
547 if err == nil {
548 depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
549 }
550 if err != nil {
551 return BuildStatement{}, err
552 }
553 return BuildStatement{
554 Depfile: nil,
555 OutputPaths: outputPaths,
556 Env: actionEntry.EnvironmentVariables,
557 Mnemonic: actionEntry.Mnemonic,
558 InputDepsetHashes: depsetHashes,
559 FileContents: actionEntry.FileContents,
560 }, nil
561}
562
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700563func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry action) (BuildStatement, error) {
564 outputPaths, _, err := a.getOutputPaths(actionEntry)
565 if err != nil {
566 return BuildStatement{}, err
567 }
568 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
569 if err != nil {
570 return BuildStatement{}, err
571 }
572 if len(inputPaths) != 1 || len(outputPaths) != 1 {
573 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
574 }
575 // The actual command is generated in bazelSingleton.GenerateBuildActions
576 return BuildStatement{
577 Depfile: nil,
578 OutputPaths: outputPaths,
579 Env: actionEntry.EnvironmentVariables,
580 Mnemonic: actionEntry.Mnemonic,
581 InputPaths: inputPaths,
582 }, nil
583}
584
Usta Shresthac2372492022-05-27 10:45:00 -0400585func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry action) (BuildStatement, error) {
586 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400587 if err != nil {
588 return BuildStatement{}, err
589 }
590
Usta Shresthac2372492022-05-27 10:45:00 -0400591 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400592 if err != nil {
593 return BuildStatement{}, err
594 }
595 if len(inputPaths) != 1 || len(outputPaths) != 1 {
596 return BuildStatement{}, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
597 }
598 out := outputPaths[0]
599 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
600 out = proptools.ShellEscapeIncludingSpaces(out)
601 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
602 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
603 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
604 symlinkPaths := outputPaths[:]
605
606 buildStatement := BuildStatement{
607 Command: command,
608 Depfile: depfile,
609 OutputPaths: outputPaths,
610 InputPaths: inputPaths,
611 Env: actionEntry.EnvironmentVariables,
612 Mnemonic: actionEntry.Mnemonic,
613 SymlinkPaths: symlinkPaths,
614 }
615 return buildStatement, nil
616}
617
Usta Shresthac2372492022-05-27 10:45:00 -0400618func (a *aqueryArtifactHandler) getOutputPaths(actionEntry action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400619 for _, outputId := range actionEntry.OutputIds {
Usta Shresthac2372492022-05-27 10:45:00 -0400620 outputPath, exists := a.artifactIdToPath[outputId]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400621 if !exists {
622 err = fmt.Errorf("undefined outputId %d", outputId)
623 return
624 }
625 ext := filepath.Ext(outputPath)
626 if ext == ".d" {
627 if depfile != nil {
628 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
629 return
630 } else {
631 depfile = &outputPath
632 }
633 } else {
634 outputPaths = append(outputPaths, outputPath)
635 }
636 }
637 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500638}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500639
Wei Li455ba832021-11-04 22:58:12 +0000640// expandTemplateContent substitutes the tokens in a template.
641func expandTemplateContent(actionEntry action) string {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700642 var replacerString []string
Wei Li455ba832021-11-04 22:58:12 +0000643 for _, pair := range actionEntry.Substitutions {
644 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400645 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000646 value = val
647 }
648 replacerString = append(replacerString, pair.Key, value)
649 }
650 replacer := strings.NewReplacer(replacerString...)
651 return replacer.Replace(actionEntry.TemplateContent)
652}
653
654func escapeCommandlineArgument(str string) string {
655 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
656 replacer := strings.NewReplacer(
657 `\`, `\\`,
658 `$`, `\$`,
659 "`", "\\`",
660 `"`, `\"`,
661 "\n", "\\n",
662 `'`, `'"'"'`,
663 )
664 return replacer.Replace(str)
665}
666
Sasha Smundak1da064c2022-06-08 16:36:16 -0700667func (a action) isSymlinkAction() bool {
Trevor Radcliffeef9c9002022-05-13 20:55:35 +0000668 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" || a.Mnemonic == "ExecutableSymlink"
Liz Kammerc49e6822021-06-08 15:04:11 -0400669}
670
Sasha Smundak1da064c2022-06-08 16:36:16 -0700671func (a action) isTemplateExpandAction() bool {
Wei Li455ba832021-11-04 22:58:12 +0000672 return a.Mnemonic == "TemplateExpand"
673}
674
Sasha Smundak1da064c2022-06-08 16:36:16 -0700675func (a action) isFileWriteAction() bool {
676 return a.Mnemonic == "FileWrite" || a.Mnemonic == "SourceSymlinkManifest"
677}
678
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700679func (a action) isSymlinkTreeAction() bool {
680 return a.Mnemonic == "SymlinkTree"
681}
682
Chris Parsons8d6e4332021-02-22 16:13:50 -0500683func shouldSkipAction(a action) bool {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400684 // Middleman actions are not handled like other actions; they are handled separately as a
685 // preparatory step so that their inputs may be relayed to actions depending on middleman
686 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500687 if a.Mnemonic == "Middleman" {
688 return true
689 }
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700690 // PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
691 if a.Mnemonic == "PythonZipper" {
692 return true
693 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500694 // Skip "Fail" actions, which are placeholder actions designed to always fail.
695 if a.Mnemonic == "Fail" {
696 return true
697 }
Yu Liu8d82ac52022-05-17 15:13:28 -0700698 if a.Mnemonic == "BaselineCoverage" {
699 return true
700 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500701 return false
702}
703
Usta Shrestha6298cc52022-05-27 17:40:21 -0400704func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]pathFragment) (string, error) {
705 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500706 currId := id
707 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
708 for currId > 0 {
709 currFragment, ok := pathFragmentsMap[currId]
710 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500711 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500712 }
713 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400714 if currId == currFragment.ParentId {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700715 return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
Liz Kammerc49e6822021-06-08 15:04:11 -0400716 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500717 currId = currFragment.ParentId
718 }
719 return filepath.Join(labels...), nil
720}