blob: 4d39e8f55529bacd231cfacf78622a941d7bfd26 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
Chris Parsons0bfb1c02022-05-12 16:43:01 -040018 "crypto/sha256"
Usta Shrestha2ccdb422022-06-02 10:19:13 -040019 "encoding/base64"
Chris Parsonsaffbb602020-12-23 12:02:11 -050020 "fmt"
21 "path/filepath"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040022 "reflect"
Chris Parsons0bfb1c02022-05-12 16:43:01 -040023 "sort"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050024 "strings"
Liz Kammera4655a92023-02-10 17:17:28 -050025 "sync"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050026
Liz Kammer690fbac2023-02-10 11:11:17 -050027 analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
28
29 "github.com/google/blueprint/metrics"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050030 "github.com/google/blueprint/proptools"
Jason Wu118fd2b2022-10-27 18:41:15 +000031 "google.golang.org/protobuf/proto"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050032)
33
Usta Shrestha6298cc52022-05-27 17:40:21 -040034type artifactId int
35type depsetId int
36type pathFragmentId int
37
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050038// artifact contains relevant portions of Bazel's aquery proto, Artifact.
39// Represents a single artifact, whether it's a source file or a derived output file.
40type artifact struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040041 Id artifactId
42 PathFragmentId pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050043}
44
45type pathFragment struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040046 Id pathFragmentId
Chris Parsonsaffbb602020-12-23 12:02:11 -050047 Label string
Usta Shrestha6298cc52022-05-27 17:40:21 -040048 ParentId pathFragmentId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050049}
50
51// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
52type KeyValuePair struct {
53 Key string
54 Value string
55}
56
Chris Parsons1a7aca02022-04-25 22:35:15 -040057// AqueryDepset is a depset definition from Bazel's aquery response. This is
Chris Parsons0bfb1c02022-05-12 16:43:01 -040058// akin to the `depSetOfFiles` in the response proto, except:
Colin Crossd079e0b2022-08-16 10:27:33 -070059// - direct artifacts are enumerated by full path instead of by ID
60// - it has a hash of the depset contents, instead of an int ID (for determinism)
61//
Chris Parsons1a7aca02022-04-25 22:35:15 -040062// A depset is a data structure for efficient transitive handling of artifact
63// paths. A single depset consists of one or more artifact paths and one or
64// more "child" depsets.
65type AqueryDepset struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -040066 ContentHash string
67 DirectArtifacts []string
68 TransitiveDepSetHashes []string
Chris Parsons1a7aca02022-04-25 22:35:15 -040069}
70
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050071// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
72// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
73// data structure for storing large numbers of file paths.
74type depSetOfFiles struct {
Usta Shrestha6298cc52022-05-27 17:40:21 -040075 Id depsetId
76 DirectArtifactIds []artifactId
77 TransitiveDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050078}
79
80// action contains relevant portions of Bazel's aquery proto, Action.
81// Represents a single command line invocation in the Bazel build graph.
82type action struct {
83 Arguments []string
84 EnvironmentVariables []KeyValuePair
Usta Shrestha6298cc52022-05-27 17:40:21 -040085 InputDepSetIds []depsetId
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050086 Mnemonic string
Usta Shrestha6298cc52022-05-27 17:40:21 -040087 OutputIds []artifactId
Wei Li455ba832021-11-04 22:58:12 +000088 TemplateContent string
89 Substitutions []KeyValuePair
Sasha Smundak1da064c2022-06-08 16:36:16 -070090 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050091}
92
93// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
94// An aquery response from Bazel contains a single ActionGraphContainer proto.
95type actionGraphContainer struct {
96 Artifacts []artifact
97 Actions []action
98 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050099 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500100}
101
102// BuildStatement contains information to register a build statement corresponding (one to one)
103// with a Bazel action from Bazel's action graph.
104type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -0400105 Command string
106 Depfile *string
107 OutputPaths []string
Liz Kammerc49e6822021-06-08 15:04:11 -0400108 SymlinkPaths []string
Liz Kammer00629db2023-02-09 14:28:15 -0500109 Env []*analysis_v2_proto.KeyValuePair
Liz Kammerc49e6822021-06-08 15:04:11 -0400110 Mnemonic string
Chris Parsons1a7aca02022-04-25 22:35:15 -0400111
112 // Inputs of this build statement, either as unexpanded depsets or expanded
113 // input paths. There should be no overlap between these fields; an input
114 // path should either be included as part of an unexpanded depset or a raw
115 // input path string, but not both.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400116 InputDepsetHashes []string
117 InputPaths []string
Sasha Smundak1da064c2022-06-08 16:36:16 -0700118 FileContents string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500119}
120
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400121// A helper type for aquery processing which facilitates retrieval of path IDs from their
122// less readable Bazel structures (depset and path fragment).
123type aqueryArtifactHandler struct {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400124 // Maps depset id to AqueryDepset, a representation of depset which is
125 // post-processed for middleman artifact handling, unhandled artifact
126 // dropping, content hashing, etc.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400127 depsetIdToAqueryDepset map[depsetId]AqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500128 emptyDepsetIds map[depsetId]struct{}
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400129 // Maps content hash to AqueryDepset.
130 depsetHashToAqueryDepset map[string]AqueryDepset
131
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400132 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
133 // may be an expensive operation.
Liz Kammera4655a92023-02-10 17:17:28 -0500134 depsetHashToArtifactPathsCache sync.Map
Usta Shrestha6298cc52022-05-27 17:40:21 -0400135 // Maps artifact ids to fully expanded paths.
136 artifactIdToPath map[artifactId]string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400137}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500138
Wei Li455ba832021-11-04 22:58:12 +0000139// The tokens should be substituted with the value specified here, instead of the
140// one returned in 'substitutions' of TemplateExpand action.
Usta Shrestha6298cc52022-05-27 17:40:21 -0400141var templateActionOverriddenTokens = map[string]string{
Wei Li455ba832021-11-04 22:58:12 +0000142 // Uses "python3" for %python_binary% instead of the value returned by aquery
143 // which is "py3wrapper.sh". See removePy3wrapperScript.
144 "%python_binary%": "python3",
145}
146
Liz Kammer00629db2023-02-09 14:28:15 -0500147const (
148 middlemanMnemonic = "Middleman"
149 // The file name of py3wrapper.sh, which is used by py_binary targets.
150 py3wrapperFileName = "/py3wrapper.sh"
151)
Wei Li455ba832021-11-04 22:58:12 +0000152
Usta Shrestha6298cc52022-05-27 17:40:21 -0400153func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
154 m := map[K]V{}
155 for _, v := range values {
156 m[keyFn(v)] = v
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500157 }
Usta Shrestha6298cc52022-05-27 17:40:21 -0400158 return m
159}
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400160
Liz Kammer00629db2023-02-09 14:28:15 -0500161func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aqueryArtifactHandler, error) {
162 pathFragments := indexBy(aqueryResult.PathFragments, func(pf *analysis_v2_proto.PathFragment) pathFragmentId {
163 return pathFragmentId(pf.Id)
Usta Shrestha6298cc52022-05-27 17:40:21 -0400164 })
165
Liz Kammer00629db2023-02-09 14:28:15 -0500166 artifactIdToPath := make(map[artifactId]string, len(aqueryResult.Artifacts))
Chris Parsonsaffbb602020-12-23 12:02:11 -0500167 for _, artifact := range aqueryResult.Artifacts {
Liz Kammer00629db2023-02-09 14:28:15 -0500168 artifactPath, err := expandPathFragment(pathFragmentId(artifact.PathFragmentId), pathFragments)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500169 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500170 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500171 }
Liz Kammer00629db2023-02-09 14:28:15 -0500172 artifactIdToPath[artifactId(artifact.Id)] = artifactPath
Chris Parsonsaffbb602020-12-23 12:02:11 -0500173 }
Chris Parsons943f2432021-01-19 11:36:50 -0500174
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400175 // Map middleman artifact ContentHash to input artifact depset ID.
Chris Parsons1a7aca02022-04-25 22:35:15 -0400176 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
Usta Shrestha16ac1352022-06-22 11:01:55 -0400177 // if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400178 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
179 // that action instead.
Liz Kammer00629db2023-02-09 14:28:15 -0500180 middlemanIdToDepsetIds := map[artifactId][]uint32{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500181 for _, actionEntry := range aqueryResult.Actions {
Liz Kammer00629db2023-02-09 14:28:15 -0500182 if actionEntry.Mnemonic == middlemanMnemonic {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500183 for _, outputId := range actionEntry.OutputIds {
Liz Kammer00629db2023-02-09 14:28:15 -0500184 middlemanIdToDepsetIds[artifactId(outputId)] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500185 }
186 }
187 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400188
Liz Kammer00629db2023-02-09 14:28:15 -0500189 depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d *analysis_v2_proto.DepSetOfFiles) depsetId {
190 return depsetId(d.Id)
Usta Shrestha6298cc52022-05-27 17:40:21 -0400191 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400192
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400193 aqueryHandler := aqueryArtifactHandler{
Usta Shrestha6298cc52022-05-27 17:40:21 -0400194 depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400195 depsetHashToAqueryDepset: map[string]AqueryDepset{},
Liz Kammera4655a92023-02-10 17:17:28 -0500196 depsetHashToArtifactPathsCache: sync.Map{},
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500197 emptyDepsetIds: make(map[depsetId]struct{}, 0),
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400198 artifactIdToPath: artifactIdToPath,
199 }
200
201 // Validate and adjust aqueryResult.DepSetOfFiles values.
202 for _, depset := range aqueryResult.DepSetOfFiles {
203 _, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
204 if err != nil {
205 return nil, err
206 }
207 }
208
209 return &aqueryHandler, nil
210}
211
212// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
213// depset.
Liz Kammer00629db2023-02-09 14:28:15 -0500214func (a *aqueryArtifactHandler) populateDepsetMaps(depset *analysis_v2_proto.DepSetOfFiles, middlemanIdToDepsetIds map[artifactId][]uint32, depsetIdToDepset map[depsetId]*analysis_v2_proto.DepSetOfFiles) (*AqueryDepset, error) {
215 if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depsetId(depset.Id)]; containsDepset {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500216 return &aqueryDepset, nil
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400217 }
218 transitiveDepsetIds := depset.TransitiveDepSetIds
Liz Kammer00629db2023-02-09 14:28:15 -0500219 directArtifactPaths := make([]string, 0, len(depset.DirectArtifactIds))
220 for _, id := range depset.DirectArtifactIds {
221 aId := artifactId(id)
222 path, pathExists := a.artifactIdToPath[aId]
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400223 if !pathExists {
Liz Kammer00629db2023-02-09 14:28:15 -0500224 return nil, fmt.Errorf("undefined input artifactId %d", aId)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400225 }
226 // Filter out any inputs which are universally dropped, and swap middleman
227 // artifacts with their corresponding depsets.
Liz Kammer00629db2023-02-09 14:28:15 -0500228 if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[aId]; isMiddleman {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400229 // Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
230 transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
Usta Shresthaef922252022-06-02 14:23:02 -0400231 } else if strings.HasSuffix(path, py3wrapperFileName) ||
Usta Shresthaef922252022-06-02 14:23:02 -0400232 strings.HasPrefix(path, "../bazel_tools") {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500233 continue
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400234 // Drop these artifacts.
235 // See go/python-binary-host-mixed-build for more details.
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700236 // 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
237 // TemplateExpandAction handles everything necessary to launch a Pythin application.
238 // 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
Usta Shresthaef922252022-06-02 14:23:02 -0400239 // containing depset to always be considered newer than their outputs.
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400240 } else {
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400241 directArtifactPaths = append(directArtifactPaths, path)
242 }
243 }
244
Liz Kammer00629db2023-02-09 14:28:15 -0500245 childDepsetHashes := make([]string, 0, len(transitiveDepsetIds))
246 for _, id := range transitiveDepsetIds {
247 childDepsetId := depsetId(id)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400248 childDepset, exists := depsetIdToDepset[childDepsetId]
249 if !exists {
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500250 if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
251 continue
252 } else {
253 return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
254 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400255 }
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500256 if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
257 return nil, err
258 } else if childAqueryDepset == nil {
259 continue
260 } else {
261 childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400262 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400263 }
Usta Shresthaef922252022-06-02 14:23:02 -0400264 if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
Liz Kammer00629db2023-02-09 14:28:15 -0500265 a.emptyDepsetIds[depsetId(depset.Id)] = struct{}{}
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500266 return nil, nil
Usta Shresthaef922252022-06-02 14:23:02 -0400267 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400268 aqueryDepset := AqueryDepset{
269 ContentHash: depsetContentHash(directArtifactPaths, childDepsetHashes),
270 DirectArtifacts: directArtifactPaths,
271 TransitiveDepSetHashes: childDepsetHashes,
272 }
Liz Kammer00629db2023-02-09 14:28:15 -0500273 a.depsetIdToAqueryDepset[depsetId(depset.Id)] = aqueryDepset
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400274 a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500275 return &aqueryDepset, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400276}
277
Chris Parsons1a7aca02022-04-25 22:35:15 -0400278// getInputPaths flattens the depsets of the given IDs and returns all transitive
279// input paths contained in these depsets.
280// This is a potentially expensive operation, and should not be invoked except
281// for actions which need specialized input handling.
Liz Kammer00629db2023-02-09 14:28:15 -0500282func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400283 var inputPaths []string
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400284
Liz Kammer00629db2023-02-09 14:28:15 -0500285 for _, id := range depsetIds {
286 inputDepSetId := depsetId(id)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400287 depset := a.depsetIdToAqueryDepset[inputDepSetId]
288 inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400289 if err != nil {
290 return nil, err
291 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400292 for _, inputPath := range inputArtifacts {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400293 inputPaths = append(inputPaths, inputPath)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400294 }
295 }
Wei Li455ba832021-11-04 22:58:12 +0000296
Chris Parsons1a7aca02022-04-25 22:35:15 -0400297 return inputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400298}
299
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400300func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
Liz Kammera4655a92023-02-10 17:17:28 -0500301 if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
302 return result.([]string), nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400303 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400304 if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
305 result := depset.DirectArtifacts
306 for _, childHash := range depset.TransitiveDepSetHashes {
307 childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400308 if err != nil {
309 return nil, err
310 }
311 result = append(result, childArtifactIds...)
312 }
Liz Kammera4655a92023-02-10 17:17:28 -0500313 a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400314 return result, nil
315 } else {
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400316 return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400317 }
318}
319
Chris Parsons1a7aca02022-04-25 22:35:15 -0400320// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
Usta Shrestha6298cc52022-05-27 17:40:21 -0400321// which should be registered (and output to a ninja file) to correspond with Bazel's
Chris Parsons1a7aca02022-04-25 22:35:15 -0400322// action graph, as described by the given action graph json proto.
323// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
324// are one-to-one with Bazel's depSetOfFiles objects.
Liz Kammera4655a92023-02-10 17:17:28 -0500325func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
Jason Wu118fd2b2022-10-27 18:41:15 +0000326 aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
327 err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400328 if err != nil {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400329 return nil, nil, err
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400330 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500331
Liz Kammer690fbac2023-02-10 11:11:17 -0500332 var aqueryHandler *aqueryArtifactHandler
333 {
334 eventHandler.Begin("init_handler")
335 defer eventHandler.End("init_handler")
Liz Kammer00629db2023-02-09 14:28:15 -0500336 aqueryHandler, err = newAqueryHandler(aqueryProto)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400337 if err != nil {
338 return nil, nil, err
Chris Parsons8d6e4332021-02-22 16:13:50 -0500339 }
Liz Kammer690fbac2023-02-10 11:11:17 -0500340 }
341
Liz Kammera4655a92023-02-10 17:17:28 -0500342 // allocate both length and capacity so each goroutine can write to an index independently without
343 // any need for synchronization for slice access.
344 buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
Liz Kammer690fbac2023-02-10 11:11:17 -0500345 {
346 eventHandler.Begin("build_statements")
347 defer eventHandler.End("build_statements")
Liz Kammera4655a92023-02-10 17:17:28 -0500348 wg := sync.WaitGroup{}
349 var errOnce sync.Once
350
351 for i, actionEntry := range aqueryProto.Actions {
352 wg.Add(1)
353 go func(i int, actionEntry *analysis_v2_proto.Action) {
354 buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry)
355 if aErr != nil {
356 errOnce.Do(func() {
357 err = aErr
358 })
359 } else {
360 // set build statement at an index rather than appending such that each goroutine does not
361 // impact other goroutines
362 buildStatements[i] = buildStatement
363 }
364 wg.Done()
365 }(i, actionEntry)
Liz Kammer690fbac2023-02-10 11:11:17 -0500366 }
Liz Kammera4655a92023-02-10 17:17:28 -0500367 wg.Wait()
368 }
369 if err != nil {
370 return nil, nil, err
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500371 }
372
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400373 depsetsByHash := map[string]AqueryDepset{}
Liz Kammer00629db2023-02-09 14:28:15 -0500374 depsets := make([]AqueryDepset, 0, len(aqueryHandler.depsetIdToAqueryDepset))
Liz Kammer690fbac2023-02-10 11:11:17 -0500375 {
376 eventHandler.Begin("depsets")
377 defer eventHandler.End("depsets")
378 for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
379 if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
380 // Two depsets collide on hash. Ensure that their contents are identical.
381 if !reflect.DeepEqual(aqueryDepset, prevEntry) {
382 return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
383 }
384 } else {
385 depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
386 depsets = append(depsets, aqueryDepset)
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400387 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400388 }
Chris Parsons1a7aca02022-04-25 22:35:15 -0400389 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400390
Liz Kammer690fbac2023-02-10 11:11:17 -0500391 eventHandler.Do("build_statement_sort", func() {
392 // Build Statements and depsets must be sorted by their content hash to
393 // preserve determinism between builds (this will result in consistent ninja file
394 // output). Note they are not sorted by their original IDs nor their Bazel ordering,
395 // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
396 sort.Slice(buildStatements, func(i, j int) bool {
Liz Kammera4655a92023-02-10 17:17:28 -0500397 // Sort all nil statements to the end of the slice
398 if buildStatements[i] == nil {
399 return false
400 } else if buildStatements[j] == nil {
401 return true
402 }
403 //For build statements, compare output lists. In Bazel, each output file
Liz Kammer690fbac2023-02-10 11:11:17 -0500404 // may only have one action which generates it, so this will provide
405 // a deterministic ordering.
406 outputs_i := buildStatements[i].OutputPaths
407 outputs_j := buildStatements[j].OutputPaths
408 if len(outputs_i) != len(outputs_j) {
409 return len(outputs_i) < len(outputs_j)
410 }
411 if len(outputs_i) == 0 {
412 // No outputs for these actions, so compare commands.
413 return buildStatements[i].Command < buildStatements[j].Command
414 }
415 // There may be multiple outputs, but the output ordering is deterministic.
416 return outputs_i[0] < outputs_j[0]
417 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400418 })
Liz Kammer690fbac2023-02-10 11:11:17 -0500419 eventHandler.Do("depset_sort", func() {
420 sort.Slice(depsets, func(i, j int) bool {
421 return depsets[i].ContentHash < depsets[j].ContentHash
422 })
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400423 })
Chris Parsons1a7aca02022-04-25 22:35:15 -0400424 return buildStatements, depsets, nil
425}
426
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400427// depsetContentHash computes and returns a SHA256 checksum of the contents of
428// the given depset. This content hash may serve as the depset's identifier.
429// Using a content hash for an identifier is superior for determinism. (For example,
430// using an integer identifier which depends on the order in which the depsets are
431// created would result in nondeterministic depset IDs.)
432func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
433 h := sha256.New()
434 // Use newline as delimiter, as paths cannot contain newline.
435 h.Write([]byte(strings.Join(directPaths, "\n")))
Usta Shrestha2ccdb422022-06-02 10:19:13 -0400436 h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
437 fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400438 return fullHash
439}
440
Liz Kammer00629db2023-02-09 14:28:15 -0500441func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []uint32) ([]string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400442 var hashes []string
Liz Kammer00629db2023-02-09 14:28:15 -0500443 for _, id := range inputDepsetIds {
444 dId := depsetId(id)
445 if aqueryDepset, exists := a.depsetIdToAqueryDepset[dId]; !exists {
446 if _, empty := a.emptyDepsetIds[dId]; !empty {
447 return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", dId)
Usta Shrestha13fd5ae2023-01-27 10:55:34 -0500448 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400449 } else {
450 hashes = append(hashes, aqueryDepset.ContentHash)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400451 }
452 }
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400453 return hashes, nil
Chris Parsons1a7aca02022-04-25 22:35:15 -0400454}
455
Liz Kammer00629db2023-02-09 14:28:15 -0500456func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400457 command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
Usta Shresthac2372492022-05-27 10:45:00 -0400458 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400459 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500460 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400461 }
Usta Shresthac2372492022-05-27 10:45:00 -0400462 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400463 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500464 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400465 }
466
Liz Kammer00629db2023-02-09 14:28:15 -0500467 buildStatement := &BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400468 Command: command,
469 Depfile: depfile,
470 OutputPaths: outputPaths,
471 InputDepsetHashes: inputDepsetHashes,
472 Env: actionEntry.EnvironmentVariables,
473 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400474 }
475 return buildStatement, nil
476}
477
Liz Kammer00629db2023-02-09 14:28:15 -0500478func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Usta Shresthac2372492022-05-27 10:45:00 -0400479 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400480 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500481 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400482 }
483 if len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500484 return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400485 }
486 expandedTemplateContent := expandTemplateContent(actionEntry)
487 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
488 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
489 // change \n to space and mess up the format of Python programs.
490 // sed is used to convert \\n back to \n before saving to output file.
491 // See go/python-binary-host-mixed-build for more details.
492 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
493 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
Usta Shresthac2372492022-05-27 10:45:00 -0400494 inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400495 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500496 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400497 }
498
Liz Kammer00629db2023-02-09 14:28:15 -0500499 buildStatement := &BuildStatement{
Chris Parsons0bfb1c02022-05-12 16:43:01 -0400500 Command: command,
501 Depfile: depfile,
502 OutputPaths: outputPaths,
503 InputDepsetHashes: inputDepsetHashes,
504 Env: actionEntry.EnvironmentVariables,
505 Mnemonic: actionEntry.Mnemonic,
Chris Parsons1a7aca02022-04-25 22:35:15 -0400506 }
507 return buildStatement, nil
508}
509
Liz Kammer00629db2023-02-09 14:28:15 -0500510func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Sasha Smundak1da064c2022-06-08 16:36:16 -0700511 outputPaths, _, err := a.getOutputPaths(actionEntry)
512 var depsetHashes []string
513 if err == nil {
514 depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
515 }
516 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500517 return nil, err
Sasha Smundak1da064c2022-06-08 16:36:16 -0700518 }
Liz Kammer00629db2023-02-09 14:28:15 -0500519 return &BuildStatement{
Sasha Smundak1da064c2022-06-08 16:36:16 -0700520 Depfile: nil,
521 OutputPaths: outputPaths,
522 Env: actionEntry.EnvironmentVariables,
523 Mnemonic: actionEntry.Mnemonic,
524 InputDepsetHashes: depsetHashes,
525 FileContents: actionEntry.FileContents,
526 }, nil
527}
528
Liz Kammer00629db2023-02-09 14:28:15 -0500529func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700530 outputPaths, _, err := a.getOutputPaths(actionEntry)
531 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500532 return nil, err
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700533 }
534 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
535 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500536 return nil, err
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700537 }
538 if len(inputPaths) != 1 || len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500539 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700540 }
541 // The actual command is generated in bazelSingleton.GenerateBuildActions
Liz Kammer00629db2023-02-09 14:28:15 -0500542 return &BuildStatement{
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700543 Depfile: nil,
544 OutputPaths: outputPaths,
545 Env: actionEntry.EnvironmentVariables,
546 Mnemonic: actionEntry.Mnemonic,
547 InputPaths: inputPaths,
548 }, nil
549}
550
Liz Kammer00629db2023-02-09 14:28:15 -0500551func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
Usta Shresthac2372492022-05-27 10:45:00 -0400552 outputPaths, depfile, err := a.getOutputPaths(actionEntry)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400553 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500554 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400555 }
556
Usta Shresthac2372492022-05-27 10:45:00 -0400557 inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400558 if err != nil {
Liz Kammer00629db2023-02-09 14:28:15 -0500559 return nil, err
Chris Parsons1a7aca02022-04-25 22:35:15 -0400560 }
561 if len(inputPaths) != 1 || len(outputPaths) != 1 {
Liz Kammer00629db2023-02-09 14:28:15 -0500562 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
Chris Parsons1a7aca02022-04-25 22:35:15 -0400563 }
564 out := outputPaths[0]
565 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
566 out = proptools.ShellEscapeIncludingSpaces(out)
567 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
568 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
569 command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
570 symlinkPaths := outputPaths[:]
571
Liz Kammer00629db2023-02-09 14:28:15 -0500572 buildStatement := &BuildStatement{
Chris Parsons1a7aca02022-04-25 22:35:15 -0400573 Command: command,
574 Depfile: depfile,
575 OutputPaths: outputPaths,
576 InputPaths: inputPaths,
577 Env: actionEntry.EnvironmentVariables,
578 Mnemonic: actionEntry.Mnemonic,
579 SymlinkPaths: symlinkPaths,
580 }
581 return buildStatement, nil
582}
583
Liz Kammer00629db2023-02-09 14:28:15 -0500584func (a *aqueryArtifactHandler) getOutputPaths(actionEntry *analysis_v2_proto.Action) (outputPaths []string, depfile *string, err error) {
Chris Parsons1a7aca02022-04-25 22:35:15 -0400585 for _, outputId := range actionEntry.OutputIds {
Liz Kammer00629db2023-02-09 14:28:15 -0500586 outputPath, exists := a.artifactIdToPath[artifactId(outputId)]
Chris Parsons1a7aca02022-04-25 22:35:15 -0400587 if !exists {
588 err = fmt.Errorf("undefined outputId %d", outputId)
589 return
590 }
591 ext := filepath.Ext(outputPath)
592 if ext == ".d" {
593 if depfile != nil {
594 err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
595 return
596 } else {
597 depfile = &outputPath
598 }
599 } else {
600 outputPaths = append(outputPaths, outputPath)
601 }
602 }
603 return
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500604}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500605
Wei Li455ba832021-11-04 22:58:12 +0000606// expandTemplateContent substitutes the tokens in a template.
Liz Kammer00629db2023-02-09 14:28:15 -0500607func expandTemplateContent(actionEntry *analysis_v2_proto.Action) string {
608 replacerString := make([]string, len(actionEntry.Substitutions)*2)
609 for i, pair := range actionEntry.Substitutions {
Wei Li455ba832021-11-04 22:58:12 +0000610 value := pair.Value
Usta Shrestha6298cc52022-05-27 17:40:21 -0400611 if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
Wei Li455ba832021-11-04 22:58:12 +0000612 value = val
613 }
Liz Kammer00629db2023-02-09 14:28:15 -0500614 replacerString[i*2] = pair.Key
615 replacerString[i*2+1] = value
Wei Li455ba832021-11-04 22:58:12 +0000616 }
617 replacer := strings.NewReplacer(replacerString...)
618 return replacer.Replace(actionEntry.TemplateContent)
619}
620
Liz Kammerf15a0792023-02-09 14:28:36 -0500621// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
622var commandLineArgumentReplacer = strings.NewReplacer(
623 `\`, `\\`,
624 `$`, `\$`,
625 "`", "\\`",
626 `"`, `\"`,
627 "\n", "\\n",
628 `'`, `'"'"'`,
629)
630
Wei Li455ba832021-11-04 22:58:12 +0000631func escapeCommandlineArgument(str string) string {
Liz Kammerf15a0792023-02-09 14:28:36 -0500632 return commandLineArgumentReplacer.Replace(str)
Wei Li455ba832021-11-04 22:58:12 +0000633}
634
Liz Kammer00629db2023-02-09 14:28:15 -0500635func (a *aqueryArtifactHandler) actionToBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
636 switch actionEntry.Mnemonic {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400637 // Middleman actions are not handled like other actions; they are handled separately as a
638 // preparatory step so that their inputs may be relayed to actions depending on middleman
639 // artifacts.
Liz Kammer00629db2023-02-09 14:28:15 -0500640 case middlemanMnemonic:
641 return nil, nil
Sasha Smundakc180dbd2022-07-03 14:55:58 -0700642 // PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
Liz Kammer00629db2023-02-09 14:28:15 -0500643 case "PythonZipper":
644 return nil, nil
Chris Parsons8d6e4332021-02-22 16:13:50 -0500645 // Skip "Fail" actions, which are placeholder actions designed to always fail.
Liz Kammer00629db2023-02-09 14:28:15 -0500646 case "Fail":
647 return nil, nil
648 case "BaselineCoverage":
649 return nil, nil
650 case "Symlink", "SolibSymlink", "ExecutableSymlink":
651 return a.symlinkActionBuildStatement(actionEntry)
652 case "TemplateExpand":
653 if len(actionEntry.Arguments) < 1 {
654 return a.templateExpandActionBuildStatement(actionEntry)
655 }
656 case "FileWrite", "SourceSymlinkManifest":
657 return a.fileWriteActionBuildStatement(actionEntry)
658 case "SymlinkTree":
659 return a.symlinkTreeActionBuildStatement(actionEntry)
Chris Parsons8d6e4332021-02-22 16:13:50 -0500660 }
Liz Kammer00629db2023-02-09 14:28:15 -0500661
662 if len(actionEntry.Arguments) < 1 {
663 return nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
Yu Liu8d82ac52022-05-17 15:13:28 -0700664 }
Liz Kammer00629db2023-02-09 14:28:15 -0500665 return a.normalActionBuildStatement(actionEntry)
666
Chris Parsons8d6e4332021-02-22 16:13:50 -0500667}
668
Liz Kammer00629db2023-02-09 14:28:15 -0500669func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]*analysis_v2_proto.PathFragment) (string, error) {
Usta Shrestha6298cc52022-05-27 17:40:21 -0400670 var labels []string
Chris Parsonsaffbb602020-12-23 12:02:11 -0500671 currId := id
672 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
673 for currId > 0 {
674 currFragment, ok := pathFragmentsMap[currId]
675 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500676 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500677 }
678 labels = append([]string{currFragment.Label}, labels...)
Liz Kammer00629db2023-02-09 14:28:15 -0500679 parentId := pathFragmentId(currFragment.ParentId)
680 if currId == parentId {
Sasha Smundakfe9a5b82022-07-27 14:51:45 -0700681 return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
Liz Kammerc49e6822021-06-08 15:04:11 -0400682 }
Liz Kammer00629db2023-02-09 14:28:15 -0500683 currId = parentId
Chris Parsonsaffbb602020-12-23 12:02:11 -0500684 }
685 return filepath.Join(labels...), nil
686}