blob: d18665e17a487c8e92fffc8f09a48fd57695cef6 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050024 "strings"
25
Liz Kammer690fbac2023-02-10 11:11:17 -050026 analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
27
28 "github.com/google/blueprint/metrics"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050029 "github.com/google/blueprint/proptools"
Jason Wu118fd2b2022-10-27 18:41:15 +000030 "google.golang.org/protobuf/proto"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050031)
32
Usta Shrestha6298cc52022-05-27 17:40:21 -040033type artifactId int
34type depsetId int
35type pathFragmentId int
36
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050037// artifact contains relevant portions of Bazel's aquery proto, Artifact.
38// Represents a single artifact, whether it's a source file or a derived output file.
39type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040040 Id artifactId
41 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050042}
43
44type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040045 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050046 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040047 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050048}
49
50// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
51type KeyValuePair struct {
52 Key string
53 Value string
54}
55
Chris Parsons1a7aca02022-04-25 22:35:15 -040056// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040057// akin to the `depSetOfFiles` in the response proto, except:
Colin Crossd079e0b2022-08-16 10:27:33 -070058// - direct artifacts are enumerated by full path instead of by ID
59// - it has a hash of the depset contents, instead of an int ID (for determinism)
60//
Chris Parsons1a7aca02022-04-25 22:35:15 -040061// A depset is a data structure for efficient transitive handling of artifact
62// paths. A single depset consists of one or more artifact paths and one or
63// more "child" depsets.
64type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040065 ContentHash string
66 DirectArtifacts []string
67 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040068}
69
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050070// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
71// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
72// data structure for storing large numbers of file paths.
73type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040074 Id depsetId
75 DirectArtifactIds []artifactId
76 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050077}
78
79// action contains relevant portions of Bazel's aquery proto, Action.
80// Represents a single command line invocation in the Bazel build graph.
81type action struct {
82 Arguments []string
83 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040084 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050085 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040086 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000087 TemplateContent string
88 Substitutions []KeyValuePair
Sasha Smundak1da064c2022-06-08 16:36:16 -070089 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050090}
91
92// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
93// An aquery response from Bazel contains a single ActionGraphContainer proto.
94type actionGraphContainer struct {
95 Artifacts []artifact
96 Actions []action
97 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050098 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050099}
100
101// BuildStatement contains information to register a build statement corresponding (one to one)
102// with a Bazel action from Bazel's action graph.
103type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400104 Command string
105 Depfile *string
106 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400107 SymlinkPaths []string
Liz Kammer00629db2023-02-09 14:28:15 -0500108 Env []*analysis_v2_proto.KeyValuePair
Liz Kammerc49e6822021-06-08 15:04:11 -0400109 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400110
111 // Inputs of this build statement, either as unexpanded depsets or expanded
112 // input paths. There should be no overlap between these fields; an input
113 // path should either be included as part of an unexpanded depset or a raw
114 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400115 InputDepsetHashes []string
116 InputPaths []string
Sasha Smundak1da064c2022-06-08 16:36:16 -0700117 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500118}
119
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400120// A helper type for aquery processing which facilitates retrieval of path IDs from their
121// less readable Bazel structures (depset and path fragment).
122type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400123 // Maps depset id to AqueryDepset, a representation of depset which is
124 // post-processed for middleman artifact handling, unhandled artifact
125 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400126 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500127 emptyDepsetIds map[depsetId]struct{}
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400128 // Maps content hash to AqueryDepset.
129 depsetHashToAqueryDepset map[string]AqueryDepset
130
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400131 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
132 // may be an expensive operation.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400133 depsetHashToArtifactPathsCache map[string][]string
Usta Shrestha6298cc52022-05-27 17:40:21 -0400134 // Maps artifact ids to fully expanded paths.
135 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400136}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500137
Wei Li455ba832021-11-04 22:58:12 +0000138// The tokens should be substituted with the value specified here, instead of the
139// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400140var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000141 // Uses "python3" for %python_binary% instead of the value returned by aquery
142 // which is "py3wrapper.sh". See removePy3wrapperScript.
143 "%python_binary%": "python3",
144}
145
Liz Kammer00629db2023-02-09 14:28:15 -0500146const (
147 middlemanMnemonic = "Middleman"
148 // The file name of py3wrapper.sh, which is used by py_binary targets.
149 py3wrapperFileName = "/py3wrapper.sh"
150)
Wei Li455ba832021-11-04 22:58:12 +0000151
Usta Shrestha6298cc52022-05-27 17:40:21 -0400152func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
153 m := map[K]V{}
154 for _, v := range values {
155 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500156 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400157 return m
158}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400159
Liz Kammer00629db2023-02-09 14:28:15 -0500160func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aqueryArtifactHandler, error) {
161 pathFragments := indexBy(aqueryResult.PathFragments, func(pf *analysis_v2_proto.PathFragment) pathFragmentId {
162 return pathFragmentId(pf.Id)
Usta Shrestha6298cc52022-05-27 17:40:21 -0400163 })
164
Liz Kammer00629db2023-02-09 14:28:15 -0500165 artifactIdToPath := make(map[artifactId]string, len(aqueryResult.Artifacts))
Chris Parsonsaffbb602020-12-23 12:02:11 -0500166 for _, artifact := range aqueryResult.Artifacts {
Liz Kammer00629db2023-02-09 14:28:15 -0500167 artifactPath, err := expandPathFragment(pathFragmentId(artifact.PathFragmentId), pathFragments)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500168 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500169 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500170 }
Liz Kammer00629db2023-02-09 14:28:15 -0500171 artifactIdToPath[artifactId(artifact.Id)] = artifactPath
Chris Parsonsaffbb602020-12-23 12:02:11 -0500172 }
Chris Parsons943f2432021-01-19 11:36:50 -0500173
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400174 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400175 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400176 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400177 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
178 // that action instead.
Liz Kammer00629db2023-02-09 14:28:15 -0500179 middlemanIdToDepsetIds := map[artifactId][]uint32{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500180 for _, actionEntry := range aqueryResult.Actions {
Liz Kammer00629db2023-02-09 14:28:15 -0500181 if actionEntry.Mnemonic == middlemanMnemonic {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500182 for _, outputId := range actionEntry.OutputIds {
Liz Kammer00629db2023-02-09 14:28:15 -0500183 middlemanIdToDepsetIds[artifactId(outputId)] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500184 }
185 }
186 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400187
Liz Kammer00629db2023-02-09 14:28:15 -0500188 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d *analysis_v2_proto.DepSetOfFiles) depsetId {
189 return depsetId(d.Id)
Usta Shrestha6298cc52022-05-27 17:40:21 -0400190 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400191
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400192 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400193 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400194 depsetHashToAqueryDepset: map[string]AqueryDepset{},
195 depsetHashToArtifactPathsCache: map[string][]string{},
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500196 emptyDepsetIds: make(map[depsetId]struct{}, 0),
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400197 artifactIdToPath: artifactIdToPath,
198 }
199
200 // Validate and adjust aqueryResult.DepSetOfFiles values.
201 for _, depset := range aqueryResult.DepSetOfFiles {
202 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
203 if err != nil {
204 return nil, err
205 }
206 }
207
208 return &aqueryHandler, nil
209}
210
211// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
212// depset.
Liz Kammer00629db2023-02-09 14:28:15 -0500213func (a *aqueryArtifactHandler) populateDepsetMaps(depset *analysis_v2_proto.DepSetOfFiles, middlemanIdToDepsetIds map[artifactId][]uint32, depsetIdToDepset map[depsetId]*analysis_v2_proto.DepSetOfFiles) (*AqueryDepset, error) {
214 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depsetId(depset.Id)]; containsDepset {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500215 return &aqueryDepset, nil
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400216 }
217 transitiveDepsetIds := depset.TransitiveDepSetIds
Liz Kammer00629db2023-02-09 14:28:15 -0500218 directArtifactPaths := make([]string, 0, len(depset.DirectArtifactIds))
219 for _, id := range depset.DirectArtifactIds {
220 aId := artifactId(id)
221 path, pathExists := a.artifactIdToPath[aId]
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400222 if !pathExists {
Liz Kammer00629db2023-02-09 14:28:15 -0500223 return nil, fmt.Errorf("undefined input artifactId %d", aId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400224 }
225 // Filter out any inputs which are universally dropped, and swap middleman
226 // artifacts with their corresponding depsets.
Liz Kammer00629db2023-02-09 14:28:15 -0500227 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[aId]; isMiddleman {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400228 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
229 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400230 } else if strings.HasSuffix(path, py3wrapperFileName) ||
Usta Shresthaef922252022-06-02 14:23:02 -0400231 strings.HasPrefix(path, "../bazel_tools") {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500232 continue
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400233 // Drop these artifacts.
234 // See go/python-binary-host-mixed-build for more details.
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700235 // 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
236 // TemplateExpandAction handles everything necessary to launch a Pythin application.
237 // 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
Usta Shresthaef922252022-06-02 14:23:02 -0400238 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400239 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400240 directArtifactPaths = append(directArtifactPaths, path)
241 }
242 }
243
Liz Kammer00629db2023-02-09 14:28:15 -0500244 childDepsetHashes := make([]string, 0, len(transitiveDepsetIds))
245 for _, id := range transitiveDepsetIds {
246 childDepsetId := depsetId(id)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400247 childDepset, exists := depsetIdToDepset[childDepsetId]
248 if !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500249 if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
250 continue
251 } else {
252 return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
253 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400254 }
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500255 if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
256 return nil, err
257 } else if childAqueryDepset == nil {
258 continue
259 } else {
260 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400261 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400262 }
Usta Shresthaef922252022-06-02 14:23:02 -0400263 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
Liz Kammer00629db2023-02-09 14:28:15 -0500264 a.emptyDepsetIds[depsetId(depset.Id)] = struct{}{}
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500265 return nil, nil
Usta Shresthaef922252022-06-02 14:23:02 -0400266 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400267 aqueryDepset := AqueryDepset{
268 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
269 DirectArtifacts: directArtifactPaths,
270 TransitiveDepSetHashes: childDepsetHashes,
271 }
Liz Kammer00629db2023-02-09 14:28:15 -0500272 a.depsetIdToAqueryDepset[depsetId(depset.Id)] = aqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400273 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500274 return &aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400275}
276
Chris Parsons1a7aca02022-04-25 22:35:15 -0400277// getInputPaths flattens the depsets of the given IDs and returns all transitive
278// input paths contained in these depsets.
279// This is a potentially expensive operation, and should not be invoked except
280// for actions which need specialized input handling.
Liz Kammer00629db2023-02-09 14:28:15 -0500281func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400282 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400283
Liz Kammer00629db2023-02-09 14:28:15 -0500284 for _, id := range depsetIds {
285 inputDepSetId := depsetId(id)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400286 depset := a.depsetIdToAqueryDepset[inputDepSetId]
287 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400288 if err != nil {
289 return nil, err
290 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400291 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400292 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400293 }
294 }
Wei Li455ba832021-11-04 22:58:12 +0000295
Chris Parsons1a7aca02022-04-25 22:35:15 -0400296 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400297}
298
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400299func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
300 if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400301 return result, nil
302 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400303 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
304 result := depset.DirectArtifacts
305 for _, childHash := range depset.TransitiveDepSetHashes {
306 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400307 if err != nil {
308 return nil, err
309 }
310 result = append(result, childArtifactIds...)
311 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400312 a.depsetHashToArtifactPathsCache[depsetHash] = result
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400313 return result, nil
314 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400315 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400316 }
317}
318
Chris Parsons1a7aca02022-04-25 22:35:15 -0400319// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400320// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400321// action graph, as described by the given action graph json proto.
322// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
323// are one-to-one with Bazel's depSetOfFiles objects.
Liz Kammer690fbac2023-02-10 11:11:17 -0500324func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]BuildStatement, []AqueryDepset, error) {
Jason Wu118fd2b2022-10-27 18:41:15 +0000325 aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
326 err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400327 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400328 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400329 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500330
Liz Kammer690fbac2023-02-10 11:11:17 -0500331 var aqueryHandler *aqueryArtifactHandler
332 {
333 eventHandler.Begin("init_handler")
334 defer eventHandler.End("init_handler")
Liz Kammer00629db2023-02-09 14:28:15 -0500335 aqueryHandler, err = newAqueryHandler(aqueryProto)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400336 if err != nil {
337 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500338 }
Liz Kammer690fbac2023-02-10 11:11:17 -0500339 }
340
Liz Kammer00629db2023-02-09 14:28:15 -0500341 buildStatements := make([]BuildStatement, 0, len(aqueryProto.Actions))
Liz Kammer690fbac2023-02-10 11:11:17 -0500342 {
343 eventHandler.Begin("build_statements")
344 defer eventHandler.End("build_statements")
Liz Kammer00629db2023-02-09 14:28:15 -0500345 var buildStatement *BuildStatement
346 for _, actionEntry := range aqueryProto.Actions {
347 buildStatement, err = aqueryHandler.actionToBuildStatement(actionEntry)
Liz Kammer690fbac2023-02-10 11:11:17 -0500348 if err != nil {
349 return nil, nil, err
350 }
Liz Kammer00629db2023-02-09 14:28:15 -0500351 if buildStatement == nil {
352 continue
353 }
354 buildStatements = append(buildStatements, *buildStatement)
Liz Kammer690fbac2023-02-10 11:11:17 -0500355 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500356 }
357
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400358 depsetsByHash := map[string]AqueryDepset{}
Liz Kammer00629db2023-02-09 14:28:15 -0500359 depsets := make([]AqueryDepset, 0, len(aqueryHandler.depsetIdToAqueryDepset))
Liz Kammer690fbac2023-02-10 11:11:17 -0500360 {
361 eventHandler.Begin("depsets")
362 defer eventHandler.End("depsets")
363 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
364 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
365 // Two depsets collide on hash. Ensure that their contents are identical.
366 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
367 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
368 }
369 } else {
370 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
371 depsets = append(depsets, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400372 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400373 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400374 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400375
Liz Kammer690fbac2023-02-10 11:11:17 -0500376 eventHandler.Do("build_statement_sort", func() {
377 // Build Statements and depsets must be sorted by their content hash to
378 // preserve determinism between builds (this will result in consistent ninja file
379 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
380 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
381 sort.Slice(buildStatements, func(i, j int) bool {
382 // For build statements, compare output lists. In Bazel, each output file
383 // may only have one action which generates it, so this will provide
384 // a deterministic ordering.
385 outputs_i := buildStatements[i].OutputPaths
386 outputs_j := buildStatements[j].OutputPaths
387 if len(outputs_i) != len(outputs_j) {
388 return len(outputs_i) < len(outputs_j)
389 }
390 if len(outputs_i) == 0 {
391 // No outputs for these actions, so compare commands.
392 return buildStatements[i].Command < buildStatements[j].Command
393 }
394 // There may be multiple outputs, but the output ordering is deterministic.
395 return outputs_i[0] < outputs_j[0]
396 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400397 })
Liz Kammer690fbac2023-02-10 11:11:17 -0500398 eventHandler.Do("depset_sort", func() {
399 sort.Slice(depsets, func(i, j int) bool {
400 return depsets[i].ContentHash < depsets[j].ContentHash
401 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400402 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400403 return buildStatements, depsets, nil
404}
405
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400406// depsetContentHash computes and returns a SHA256 checksum of the contents of
407// the given depset. This content hash may serve as the depset's identifier.
408// Using a content hash for an identifier is superior for determinism. (For example,
409// using an integer identifier which depends on the order in which the depsets are
410// created would result in nondeterministic depset IDs.)
411func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
412 h := sha256.New()
413 // Use newline as delimiter, as paths cannot contain newline.
414 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400415 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
416 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400417 return fullHash
418}
419
Liz Kammer00629db2023-02-09 14:28:15 -0500420func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []uint32) ([]string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400421 var hashes []string
Liz Kammer00629db2023-02-09 14:28:15 -0500422 for _, id := range inputDepsetIds {
423 dId := depsetId(id)
424 if aqueryDepset, exists := a.depsetIdToAqueryDepset[dId]; !exists {
425 if _, empty := a.emptyDepsetIds[dId]; !empty {
426 return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", dId)
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500427 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400428 } else {
429 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400430 }
431 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400432 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400433}
434
Liz Kammer00629db2023-02-09 14:28:15 -0500435func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400436 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400437 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400438 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500439 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400440 }
Usta Shresthac2372492022-05-27 10:45:00 -0400441 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400442 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500443 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400444 }
445
Liz Kammer00629db2023-02-09 14:28:15 -0500446 buildStatement := &BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400447 Command: command,
448 Depfile: depfile,
449 OutputPaths: outputPaths,
450 InputDepsetHashes: inputDepsetHashes,
451 Env: actionEntry.EnvironmentVariables,
452 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400453 }
454 return buildStatement, nil
455}
456
Liz Kammer00629db2023-02-09 14:28:15 -0500457func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Usta Shresthac2372492022-05-27 10:45:00 -0400458 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400459 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500460 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400461 }
462 if len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500463 return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400464 }
465 expandedTemplateContent := expandTemplateContent(actionEntry)
466 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
467 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
468 // change \n to space and mess up the format of Python programs.
469 // sed is used to convert \\n back to \n before saving to output file.
470 // See go/python-binary-host-mixed-build for more details.
471 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
472 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400473 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400474 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500475 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400476 }
477
Liz Kammer00629db2023-02-09 14:28:15 -0500478 buildStatement := &BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400479 Command: command,
480 Depfile: depfile,
481 OutputPaths: outputPaths,
482 InputDepsetHashes: inputDepsetHashes,
483 Env: actionEntry.EnvironmentVariables,
484 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400485 }
486 return buildStatement, nil
487}
488
Liz Kammer00629db2023-02-09 14:28:15 -0500489func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Sasha Smundak1da064c2022-06-08 16:36:16 -0700490 outputPaths, _, err := a.getOutputPaths(actionEntry)
491 var depsetHashes []string
492 if err == nil {
493 depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
494 }
495 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500496 return nil, err
Sasha Smundak1da064c2022-06-08 16:36:16 -0700497 }
Liz Kammer00629db2023-02-09 14:28:15 -0500498 return &BuildStatement{
Sasha Smundak1da064c2022-06-08 16:36:16 -0700499 Depfile: nil,
500 OutputPaths: outputPaths,
501 Env: actionEntry.EnvironmentVariables,
502 Mnemonic: actionEntry.Mnemonic,
503 InputDepsetHashes: depsetHashes,
504 FileContents: actionEntry.FileContents,
505 }, nil
506}
507
Liz Kammer00629db2023-02-09 14:28:15 -0500508func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700509 outputPaths, _, err := a.getOutputPaths(actionEntry)
510 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500511 return nil, err
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700512 }
513 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
514 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500515 return nil, err
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700516 }
517 if len(inputPaths) != 1 || len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500518 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700519 }
520 // The actual command is generated in bazelSingleton.GenerateBuildActions
Liz Kammer00629db2023-02-09 14:28:15 -0500521 return &BuildStatement{
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700522 Depfile: nil,
523 OutputPaths: outputPaths,
524 Env: actionEntry.EnvironmentVariables,
525 Mnemonic: actionEntry.Mnemonic,
526 InputPaths: inputPaths,
527 }, nil
528}
529
Liz Kammer00629db2023-02-09 14:28:15 -0500530func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Usta Shresthac2372492022-05-27 10:45:00 -0400531 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400532 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500533 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400534 }
535
Usta Shresthac2372492022-05-27 10:45:00 -0400536 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400537 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500538 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400539 }
540 if len(inputPaths) != 1 || len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500541 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400542 }
543 out := outputPaths[0]
544 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
545 out = proptools.ShellEscapeIncludingSpaces(out)
546 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
547 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
548 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
549 symlinkPaths := outputPaths[:]
550
Liz Kammer00629db2023-02-09 14:28:15 -0500551 buildStatement := &BuildStatement{
Chris Parsons1a7aca02022-04-25 22:35:15 -0400552 Command: command,
553 Depfile: depfile,
554 OutputPaths: outputPaths,
555 InputPaths: inputPaths,
556 Env: actionEntry.EnvironmentVariables,
557 Mnemonic: actionEntry.Mnemonic,
558 SymlinkPaths: symlinkPaths,
559 }
560 return buildStatement, nil
561}
562
Liz Kammer00629db2023-02-09 14:28:15 -0500563func (a *aqueryArtifactHandler) getOutputPaths(actionEntry *analysis_v2_proto.Action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400564 for _, outputId := range actionEntry.OutputIds {
Liz Kammer00629db2023-02-09 14:28:15 -0500565 outputPath, exists := a.artifactIdToPath[artifactId(outputId)]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400566 if !exists {
567 err = fmt.Errorf("undefined outputId %d", outputId)
568 return
569 }
570 ext := filepath.Ext(outputPath)
571 if ext == ".d" {
572 if depfile != nil {
573 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
574 return
575 } else {
576 depfile = &outputPath
577 }
578 } else {
579 outputPaths = append(outputPaths, outputPath)
580 }
581 }
582 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500583}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500584
Wei Li455ba832021-11-04 22:58:12 +0000585// expandTemplateContent substitutes the tokens in a template.
Liz Kammer00629db2023-02-09 14:28:15 -0500586func expandTemplateContent(actionEntry *analysis_v2_proto.Action) string {
587 replacerString := make([]string, len(actionEntry.Substitutions)*2)
588 for i, pair := range actionEntry.Substitutions {
Wei Li455ba832021-11-04 22:58:12 +0000589 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400590 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000591 value = val
592 }
Liz Kammer00629db2023-02-09 14:28:15 -0500593 replacerString[i*2] = pair.Key
594 replacerString[i*2+1] = value
Wei Li455ba832021-11-04 22:58:12 +0000595 }
596 replacer := strings.NewReplacer(replacerString...)
597 return replacer.Replace(actionEntry.TemplateContent)
598}
599
Liz Kammerf15a0792023-02-09 14:28:36 -0500600// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
601var commandLineArgumentReplacer = strings.NewReplacer(
602 `\`, `\\`,
603 `$`, `\$`,
604 "`", "\\`",
605 `"`, `\"`,
606 "\n", "\\n",
607 `'`, `'"'"'`,
608)
609
Wei Li455ba832021-11-04 22:58:12 +0000610func escapeCommandlineArgument(str string) string {
Liz Kammerf15a0792023-02-09 14:28:36 -0500611 return commandLineArgumentReplacer.Replace(str)
Wei Li455ba832021-11-04 22:58:12 +0000612}
613
Liz Kammer00629db2023-02-09 14:28:15 -0500614func (a *aqueryArtifactHandler) actionToBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
615 switch actionEntry.Mnemonic {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400616 // Middleman actions are not handled like other actions; they are handled separately as a
617 // preparatory step so that their inputs may be relayed to actions depending on middleman
618 // artifacts.
Liz Kammer00629db2023-02-09 14:28:15 -0500619 case middlemanMnemonic:
620 return nil, nil
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700621 // PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
Liz Kammer00629db2023-02-09 14:28:15 -0500622 case "PythonZipper":
623 return nil, nil
Chris Parsons8d6e4332021-02-22 16:13:50 -0500624 // Skip "Fail" actions, which are placeholder actions designed to always fail.
Liz Kammer00629db2023-02-09 14:28:15 -0500625 case "Fail":
626 return nil, nil
627 case "BaselineCoverage":
628 return nil, nil
629 case "Symlink", "SolibSymlink", "ExecutableSymlink":
630 return a.symlinkActionBuildStatement(actionEntry)
631 case "TemplateExpand":
632 if len(actionEntry.Arguments) < 1 {
633 return a.templateExpandActionBuildStatement(actionEntry)
634 }
635 case "FileWrite", "SourceSymlinkManifest":
636 return a.fileWriteActionBuildStatement(actionEntry)
637 case "SymlinkTree":
638 return a.symlinkTreeActionBuildStatement(actionEntry)
Chris Parsons8d6e4332021-02-22 16:13:50 -0500639 }
Liz Kammer00629db2023-02-09 14:28:15 -0500640
641 if len(actionEntry.Arguments) < 1 {
642 return nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
Yu Liu8d82ac52022-05-17 15:13:28 -0700643 }
Liz Kammer00629db2023-02-09 14:28:15 -0500644 return a.normalActionBuildStatement(actionEntry)
645
Chris Parsons8d6e4332021-02-22 16:13:50 -0500646}
647
Liz Kammer00629db2023-02-09 14:28:15 -0500648func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]*analysis_v2_proto.PathFragment) (string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400649 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500650 currId := id
651 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
652 for currId > 0 {
653 currFragment, ok := pathFragmentsMap[currId]
654 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500655 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500656 }
657 labels = append([]string{currFragment.Label}, labels...)
Liz Kammer00629db2023-02-09 14:28:15 -0500658 parentId := pathFragmentId(currFragment.ParentId)
659 if currId == parentId {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700660 return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
Liz Kammerc49e6822021-06-08 15:04:11 -0400661 }
Liz Kammer00629db2023-02-09 14:28:15 -0500662 currId = parentId
Chris Parsonsaffbb602020-12-23 12:02:11 -0500663 }
664 return filepath.Join(labels...), nil
665}