blob: 3ce86ce72953048da7a0cf97de16a65d599834d5 [file] [log] [blame]
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -05001// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
18 "encoding/json"
Chris Parsonsaffbb602020-12-23 12:02:11 -050019 "fmt"
20 "path/filepath"
Wei Li664a4fd2021-10-07 06:26:48 +000021 "regexp"
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050022 "strings"
23
24 "github.com/google/blueprint/proptools"
25)
26
27// artifact contains relevant portions of Bazel's aquery proto, Artifact.
28// Represents a single artifact, whether it's a source file or a derived output file.
29type artifact struct {
Chris Parsonsaffbb602020-12-23 12:02:11 -050030 Id int
31 PathFragmentId int
32}
33
34type pathFragment struct {
35 Id int
36 Label string
37 ParentId int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050038}
39
40// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
41type KeyValuePair struct {
42 Key string
43 Value string
44}
45
46// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
47// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
48// data structure for storing large numbers of file paths.
49type depSetOfFiles struct {
Chris Parsons943f2432021-01-19 11:36:50 -050050 Id int
51 DirectArtifactIds []int
52 TransitiveDepSetIds []int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050053}
54
55// action contains relevant portions of Bazel's aquery proto, Action.
56// Represents a single command line invocation in the Bazel build graph.
57type action struct {
58 Arguments []string
59 EnvironmentVariables []KeyValuePair
Chris Parsonsaffbb602020-12-23 12:02:11 -050060 InputDepSetIds []int
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050061 Mnemonic string
Chris Parsonsaffbb602020-12-23 12:02:11 -050062 OutputIds []int
Wei Li664a4fd2021-10-07 06:26:48 +000063 TemplateContent string
64 Substitutions []KeyValuePair
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050065}
66
67// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
68// An aquery response from Bazel contains a single ActionGraphContainer proto.
69type actionGraphContainer struct {
70 Artifacts []artifact
71 Actions []action
72 DepSetOfFiles []depSetOfFiles
Chris Parsonsaffbb602020-12-23 12:02:11 -050073 PathFragments []pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050074}
75
76// BuildStatement contains information to register a build statement corresponding (one to one)
77// with a Bazel action from Bazel's action graph.
78type BuildStatement struct {
Liz Kammerc49e6822021-06-08 15:04:11 -040079 Command string
80 Depfile *string
81 OutputPaths []string
82 InputPaths []string
83 SymlinkPaths []string
84 Env []KeyValuePair
85 Mnemonic string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -050086}
87
Chris Parsonsc4fb1332021-05-18 12:31:25 -040088// A helper type for aquery processing which facilitates retrieval of path IDs from their
89// less readable Bazel structures (depset and path fragment).
90type aqueryArtifactHandler struct {
91 // Maps middleman artifact Id to input artifact depset ID.
92 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
93 // if we find a middleman action which has outputs [foo, bar], and output [baz_middleman], then,
94 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
95 // that action instead.
96 middlemanIdToDepsetIds map[int][]int
97 // Maps depset Id to depset struct.
98 depsetIdToDepset map[int]depSetOfFiles
99 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
100 // may be an expensive operation.
101 depsetIdToArtifactIdsCache map[int][]int
102 // Maps artifact Id to fully expanded path.
103 artifactIdToPath map[int]string
104}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500105
Wei Li664a4fd2021-10-07 06:26:48 +0000106// The tokens should be substituted with the value specified here, instead of the
107// one returned in 'substitutions' of TemplateExpand action.
108var TemplateActionOverriddenTokens = map[string]string{
109 // Uses "python3" for %python_binary% instead of the value returned by aquery
110 // which is "py3wrapper.sh". See removePy3wrapperScript.
111 "%python_binary%": "python3",
112}
113
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400114func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
Chris Parsonsaffbb602020-12-23 12:02:11 -0500115 pathFragments := map[int]pathFragment{}
116 for _, pathFragment := range aqueryResult.PathFragments {
117 pathFragments[pathFragment.Id] = pathFragment
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500118 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400119
Chris Parsonsaffbb602020-12-23 12:02:11 -0500120 artifactIdToPath := map[int]string{}
121 for _, artifact := range aqueryResult.Artifacts {
122 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
123 if err != nil {
Chris Parsons4f069892021-01-15 12:22:41 -0500124 return nil, err
Chris Parsonsaffbb602020-12-23 12:02:11 -0500125 }
126 artifactIdToPath[artifact.Id] = artifactPath
127 }
Chris Parsons943f2432021-01-19 11:36:50 -0500128
129 depsetIdToDepset := map[int]depSetOfFiles{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500130 for _, depset := range aqueryResult.DepSetOfFiles {
Chris Parsons943f2432021-01-19 11:36:50 -0500131 depsetIdToDepset[depset.Id] = depset
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500132 }
133
Chris Parsons8d6e4332021-02-22 16:13:50 -0500134 // Do a pass through all actions to identify which artifacts are middleman artifacts.
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400135 middlemanIdToDepsetIds := map[int][]int{}
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500136 for _, actionEntry := range aqueryResult.Actions {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500137 if actionEntry.Mnemonic == "Middleman" {
138 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400139 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
Chris Parsons8d6e4332021-02-22 16:13:50 -0500140 }
141 }
142 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400143 return &aqueryArtifactHandler{
144 middlemanIdToDepsetIds: middlemanIdToDepsetIds,
145 depsetIdToDepset: depsetIdToDepset,
146 depsetIdToArtifactIdsCache: map[int][]int{},
147 artifactIdToPath: artifactIdToPath,
148 }, nil
149}
150
151func (a *aqueryArtifactHandler) getInputPaths(depsetIds []int) ([]string, error) {
152 inputPaths := []string{}
153
154 for _, inputDepSetId := range depsetIds {
155 inputArtifacts, err := a.artifactIdsFromDepsetId(inputDepSetId)
156 if err != nil {
157 return nil, err
158 }
159 for _, inputId := range inputArtifacts {
160 if middlemanInputDepsetIds, isMiddlemanArtifact := a.middlemanIdToDepsetIds[inputId]; isMiddlemanArtifact {
161 // Add all inputs from middleman actions which created middleman artifacts which are
162 // in the inputs for this action.
163 swappedInputPaths, err := a.getInputPaths(middlemanInputDepsetIds)
164 if err != nil {
165 return nil, err
166 }
167 inputPaths = append(inputPaths, swappedInputPaths...)
168 } else {
169 inputPath, exists := a.artifactIdToPath[inputId]
170 if !exists {
171 return nil, fmt.Errorf("undefined input artifactId %d", inputId)
172 }
173 inputPaths = append(inputPaths, inputPath)
174 }
175 }
176 }
Wei Li664a4fd2021-10-07 06:26:48 +0000177
178 // Filter out py3wrapper.sh & MANIFEST file. The middleman action returned by aquery
179 // for python binary is the input list for a dependent of python binary, since py3wrapper.sh
180 // and MANIFEST file could not be created in mixed build, they should be removed from
181 // the input paths here.
182 py3wrapper := "/py3wrapper.sh"
183 manifestFile := regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
184 filteredInputPaths := []string{}
185 for _, path := range inputPaths {
186 if strings.HasSuffix(path, py3wrapper) || manifestFile.MatchString(path) {
187 continue
188 }
189 filteredInputPaths = append(filteredInputPaths, path)
190 }
191
192 return filteredInputPaths, nil
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400193}
194
195func (a *aqueryArtifactHandler) artifactIdsFromDepsetId(depsetId int) ([]int, error) {
196 if result, exists := a.depsetIdToArtifactIdsCache[depsetId]; exists {
197 return result, nil
198 }
199 if depset, exists := a.depsetIdToDepset[depsetId]; exists {
200 result := depset.DirectArtifactIds
201 for _, childId := range depset.TransitiveDepSetIds {
202 childArtifactIds, err := a.artifactIdsFromDepsetId(childId)
203 if err != nil {
204 return nil, err
205 }
206 result = append(result, childArtifactIds...)
207 }
208 a.depsetIdToArtifactIdsCache[depsetId] = result
209 return result, nil
210 } else {
211 return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
212 }
213}
214
215// AqueryBuildStatements returns an array of BuildStatements which should be registered (and output
216// to a ninja file) to correspond one-to-one with the given action graph json proto (from a bazel
217// aquery invocation).
218func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, error) {
219 buildStatements := []BuildStatement{}
220
221 var aqueryResult actionGraphContainer
222 err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
223 if err != nil {
224 return nil, err
225 }
226 aqueryHandler, err := newAqueryHandler(aqueryResult)
227 if err != nil {
228 return nil, err
229 }
Chris Parsons8d6e4332021-02-22 16:13:50 -0500230
231 for _, actionEntry := range aqueryResult.Actions {
232 if shouldSkipAction(actionEntry) {
233 continue
234 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500235 outputPaths := []string{}
Liz Kammerde116852021-03-25 16:42:37 -0400236 var depfile *string
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500237 for _, outputId := range actionEntry.OutputIds {
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400238 outputPath, exists := aqueryHandler.artifactIdToPath[outputId]
Chris Parsons4f069892021-01-15 12:22:41 -0500239 if !exists {
240 return nil, fmt.Errorf("undefined outputId %d", outputId)
241 }
Liz Kammerde116852021-03-25 16:42:37 -0400242 ext := filepath.Ext(outputPath)
243 if ext == ".d" {
244 if depfile != nil {
245 return nil, fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
246 } else {
247 depfile = &outputPath
248 }
249 } else {
250 outputPaths = append(outputPaths, outputPath)
251 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500252 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400253 inputPaths, err := aqueryHandler.getInputPaths(actionEntry.InputDepSetIds)
254 if err != nil {
255 return nil, err
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500256 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400257
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500258 buildStatement := BuildStatement{
259 Command: strings.Join(proptools.ShellEscapeList(actionEntry.Arguments), " "),
Liz Kammerde116852021-03-25 16:42:37 -0400260 Depfile: depfile,
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500261 OutputPaths: outputPaths,
262 InputPaths: inputPaths,
263 Env: actionEntry.EnvironmentVariables,
Liz Kammerc49e6822021-06-08 15:04:11 -0400264 Mnemonic: actionEntry.Mnemonic,
265 }
266
267 if isSymlinkAction(actionEntry) {
268 if len(inputPaths) != 1 || len(outputPaths) != 1 {
269 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
270 }
271 out := outputPaths[0]
272 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
273 out = proptools.ShellEscapeIncludingSpaces(out)
274 in := proptools.ShellEscapeIncludingSpaces(inputPaths[0])
Chris Parsonsd80b3c82021-10-14 15:24:05 -0400275 // Use hard links, because some soong actions expect real files (for example, `cp -d`).
276 buildStatement.Command = fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -f %[3]s %[2]s", outDir, out, in)
Liz Kammerc49e6822021-06-08 15:04:11 -0400277 buildStatement.SymlinkPaths = outputPaths[:]
Wei Li664a4fd2021-10-07 06:26:48 +0000278 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
279 if len(outputPaths) != 1 {
280 return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
281 }
282 expandedTemplateContent := expandTemplateContent(actionEntry)
283 command := fmt.Sprintf(`echo "%[1]s" | sed "s/\\\\n/\\n/g" >> %[2]s && chmod a+x %[2]s`,
284 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
285 buildStatement.Command = command
286 } else if isPythonZipperAction(actionEntry) {
287 if len(inputPaths) < 1 || len(outputPaths) != 1 {
288 return nil, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
289 }
290 buildStatement.InputPaths, buildStatement.Command = removePy3wrapperScript(buildStatement)
291 buildStatement.Command = addCommandForPyBinaryRunfilesDir(buildStatement, inputPaths[0], outputPaths[0])
292 addPythonZipFileAsDependencyOfPythonBinary(&buildStatements, outputPaths[0])
Liz Kammerc49e6822021-06-08 15:04:11 -0400293 } else if len(actionEntry.Arguments) < 1 {
Liz Kammerde116852021-03-25 16:42:37 -0400294 return nil, fmt.Errorf("received action with no command: [%v]", buildStatement)
Chris Parsons8d6e4332021-02-22 16:13:50 -0500295 }
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500296 buildStatements = append(buildStatements, buildStatement)
297 }
298
Chris Parsons4f069892021-01-15 12:22:41 -0500299 return buildStatements, nil
Chris Parsonsdbcb1ff2020-12-10 17:19:18 -0500300}
Chris Parsonsaffbb602020-12-23 12:02:11 -0500301
Wei Li664a4fd2021-10-07 06:26:48 +0000302// expandTemplateContent substitutes the tokens in a template.
303func expandTemplateContent(actionEntry action) string {
304 replacerString := []string{}
305 for _, pair := range actionEntry.Substitutions {
306 value := pair.Value
307 if val, ok := TemplateActionOverriddenTokens[pair.Key]; ok {
308 value = val
309 }
310 replacerString = append(replacerString, pair.Key, value)
311 }
312 replacer := strings.NewReplacer(replacerString...)
313 return replacer.Replace(actionEntry.TemplateContent)
314}
315
316func escapeCommandlineArgument(str string) string {
317 // \->\\, $->\$, `->\`, "->\", \n->\\n
318 replacer := strings.NewReplacer(
319 `\`, `\\`,
320 `$`, `\$`,
321 "`", "\\`",
322 `"`, `\"`,
323 "\n", "\\n",
324 )
325 return replacer.Replace(str)
326}
327
328// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
329// creating python zip file in mixed build. py3wrapper.sh is returned as input by aquery but
330// there is no action returned by aquery for creating it. So in mixed build "python3" is used
331// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
332// removed from input paths and command of creating python zip file.
333func removePy3wrapperScript(bs BuildStatement) (newInputPaths []string, newCommand string) {
334 // Remove from inputs
335 py3wrapper := "/py3wrapper.sh"
336 filteredInputPaths := []string{}
337 for _, path := range bs.InputPaths {
338 if !strings.HasSuffix(path, py3wrapper) {
339 filteredInputPaths = append(filteredInputPaths, path)
340 }
341 }
342 newInputPaths = filteredInputPaths
343
344 // Remove from command line
345 var re = regexp.MustCompile(`\S*` + py3wrapper)
346 newCommand = re.ReplaceAllString(bs.Command, "")
347 return
348}
349
350// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory
351// which currently could not be created with aquery output.
352func addCommandForPyBinaryRunfilesDir(bs BuildStatement, zipperCommandPath, zipFilePath string) string {
353 // Unzip the zip file, zipFilePath looks like <python_binary>.zip
354 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
355 command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName)
356 // Create a symblic link in <python_binary>.runfile/, which is the expected structure
357 // when running the python binary stub script.
358 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
359 return bs.Command + " && " + command
360}
361
362// addPythonZipFileAsDependencyOfPythonBinary adds the action of generating python zip file as dependency of
363// the corresponding action of creating python binary stub script. In mixed build the dependent of python binary depends on
364// the action of createing python binary stub script only, which is not sufficient without the python zip file created.
365func addPythonZipFileAsDependencyOfPythonBinary(buildStatements *[]BuildStatement, pythonZipFilePath string) {
366 for i, _ := range *buildStatements {
367 if len((*buildStatements)[i].OutputPaths) >= 1 && (*buildStatements)[i].OutputPaths[0]+".zip" == pythonZipFilePath {
368 (*buildStatements)[i].InputPaths = append((*buildStatements)[i].InputPaths, pythonZipFilePath)
369 }
370 }
371}
372
Liz Kammerc49e6822021-06-08 15:04:11 -0400373func isSymlinkAction(a action) bool {
374 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink"
375}
376
Wei Li664a4fd2021-10-07 06:26:48 +0000377func isTemplateExpandAction(a action) bool {
378 return a.Mnemonic == "TemplateExpand"
379}
380
381func isPythonZipperAction(a action) bool {
382 return a.Mnemonic == "PythonZipper"
383}
384
Chris Parsons8d6e4332021-02-22 16:13:50 -0500385func shouldSkipAction(a action) bool {
Liz Kammerc49e6822021-06-08 15:04:11 -0400386 // TODO(b/180945121): Handle complex symlink actions.
387 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
Chris Parsons8d6e4332021-02-22 16:13:50 -0500388 return true
389 }
Chris Parsonsc4fb1332021-05-18 12:31:25 -0400390 // Middleman actions are not handled like other actions; they are handled separately as a
391 // preparatory step so that their inputs may be relayed to actions depending on middleman
392 // artifacts.
Chris Parsons8d6e4332021-02-22 16:13:50 -0500393 if a.Mnemonic == "Middleman" {
394 return true
395 }
396 // Skip "Fail" actions, which are placeholder actions designed to always fail.
397 if a.Mnemonic == "Fail" {
398 return true
399 }
400 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
401 // about the contents that are written.
402 if a.Mnemonic == "FileWrite" {
403 return true
404 }
405 return false
406}
407
Chris Parsonsaffbb602020-12-23 12:02:11 -0500408func expandPathFragment(id int, pathFragmentsMap map[int]pathFragment) (string, error) {
409 labels := []string{}
410 currId := id
411 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
412 for currId > 0 {
413 currFragment, ok := pathFragmentsMap[currId]
414 if !ok {
Chris Parsons4f069892021-01-15 12:22:41 -0500415 return "", fmt.Errorf("undefined path fragment id %d", currId)
Chris Parsonsaffbb602020-12-23 12:02:11 -0500416 }
417 labels = append([]string{currFragment.Label}, labels...)
Liz Kammerc49e6822021-06-08 15:04:11 -0400418 if currId == currFragment.ParentId {
419 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
420 }
Chris Parsonsaffbb602020-12-23 12:02:11 -0500421 currId = currFragment.ParentId
422 }
423 return filepath.Join(labels...), nil
424}