blob: 06c2627d15ab717ecca50f084bf5fff54d2f4c61 [file] [log] [blame]
Bob Badoure6fdd142021-12-09 22:10:43 -08001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "bufio"
19 "crypto/md5"
20 "fmt"
21 "io"
22 "io/fs"
23 "path/filepath"
24 "regexp"
25 "sort"
26 "strings"
27)
28
29const (
30 noProjectName = "\u2205"
31)
32
33var (
34 nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
35 descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
36 versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
37 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
38)
39
40// NoticeIndex transforms license metadata into license text hashes, library
41// names, and install paths indexing them for fast lookup/iteration.
42type NoticeIndex struct {
43 // lg identifies the license graph to which the index applies.
44 lg *LicenseGraph
45 // rs identifies the set of resolutions upon which the index is based.
46 rs ResolutionSet
47 // shipped identifies the set of target nodes shipped directly or as derivative works.
48 shipped *TargetNodeSet
49 // rootFS locates the root of the file system from which to read the files.
50 rootFS fs.FS
51 // hash maps license text filenames to content hashes
52 hash map[string]hash
53 // text maps content hashes to content
54 text map[hash][]byte
55 // hashLibInstall maps hashes to libraries to install paths.
56 hashLibInstall map[hash]map[string]map[string]struct{}
57 // installLibHash maps install paths to libraries to hashes.
58 installLibHash map[string]map[string]map[hash]struct{}
59 // libHash maps libraries to hashes.
60 libHash map[string]map[hash]struct{}
61 // targetHash maps target nodes to hashes.
62 targetHashes map[*TargetNode]map[hash]struct{}
63 // projectName maps project directory names to project name text.
64 projectName map[string]string
65}
66
67// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
68// using the files rooted at `rootFS`.
69func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
70 if rs == nil {
71 rs = ResolveNotices(lg)
72 }
73 ni := &NoticeIndex{
74 lg, rs, ShippedNodes(lg), rootFS,
75 make(map[string]hash),
76 make(map[hash][]byte),
77 make(map[hash]map[string]map[string]struct{}),
78 make(map[string]map[string]map[hash]struct{}),
79 make(map[string]map[hash]struct{}),
80 make(map[*TargetNode]map[hash]struct{}),
81 make(map[string]string),
82 }
83
84 // index adds all license texts for `tn` to the index.
85 index := func(tn *TargetNode) (map[hash]struct{}, error) {
86 if hashes, ok := ni.targetHashes[tn]; ok {
87 return hashes, nil
88 }
89 hashes := make(map[hash]struct{})
90 for _, text := range tn.LicenseTexts() {
91 if _, ok := ni.hash[text]; !ok {
92 err := ni.addText(text)
93 if err != nil {
94 return nil, err
95 }
96 }
97 hash := ni.hash[text]
98 if _, ok := hashes[hash]; !ok {
99 hashes[hash] = struct{}{}
100 }
101 }
102 ni.targetHashes[tn] = hashes
103 return hashes, nil
104 }
105
106 link := func(libName string, hashes map[hash]struct{}, installPaths []string) {
107 if _, ok := ni.libHash[libName]; !ok {
108 ni.libHash[libName] = make(map[hash]struct{})
109 }
110 for h := range hashes {
111 if _, ok := ni.hashLibInstall[h]; !ok {
112 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
113 }
114 if _, ok := ni.libHash[libName][h]; !ok {
115 ni.libHash[libName][h] = struct{}{}
116 }
117 for _, installPath := range installPaths {
118 if _, ok := ni.installLibHash[installPath]; !ok {
119 ni.installLibHash[installPath] = make(map[string]map[hash]struct{})
120 ni.installLibHash[installPath][libName] = make(map[hash]struct{})
121 ni.installLibHash[installPath][libName][h] = struct{}{}
122 } else if _, ok = ni.installLibHash[installPath][libName]; !ok {
123 ni.installLibHash[installPath][libName] = make(map[hash]struct{})
124 ni.installLibHash[installPath][libName][h] = struct{}{}
125 } else if _, ok = ni.installLibHash[installPath][libName][h]; !ok {
126 ni.installLibHash[installPath][libName][h] = struct{}{}
127 }
128 if _, ok := ni.hashLibInstall[h]; !ok {
129 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
130 ni.hashLibInstall[h][libName] = make(map[string]struct{})
131 ni.hashLibInstall[h][libName][installPath] = struct{}{}
132 } else if _, ok = ni.hashLibInstall[h][libName]; !ok {
133 ni.hashLibInstall[h][libName] = make(map[string]struct{})
134 ni.hashLibInstall[h][libName][installPath] = struct{}{}
135 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
136 ni.hashLibInstall[h][libName][installPath] = struct{}{}
137 }
138 }
139 }
140 }
141
142 // returns error from walk below.
143 var err error
144
145 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
146 if err != nil {
147 return false
148 }
149 if !ni.shipped.Contains(tn) {
150 return false
151 }
152 installPaths := getInstallPaths(tn, path)
153 var hashes map[hash]struct{}
154 hashes, err = index(tn)
155 if err != nil {
156 return false
157 }
158 link(ni.getLibName(tn), hashes, installPaths)
159 if tn.IsContainer() {
160 return true
161 }
162
163 for _, r := range rs.Resolutions(tn) {
164 hashes, err = index(r.actsOn)
165 if err != nil {
166 return false
167 }
168 link(ni.getLibName(r.actsOn), hashes, installPaths)
169 }
170 return false
171 })
172
173 if err != nil {
174 return nil, err
175 }
176
177 return ni, nil
178}
179
180// Hashes returns an ordered channel of the hashed license texts.
181func (ni *NoticeIndex) Hashes() chan hash {
182 c := make(chan hash)
183 go func() {
184 libs := make([]string, 0, len(ni.libHash))
185 for libName := range ni.libHash {
186 libs = append(libs, libName)
187 }
188 sort.Strings(libs)
189 hashes := make(map[hash]struct{})
190 for _, libName := range libs {
191 hl := make([]hash, 0, len(ni.libHash[libName]))
192 for h := range ni.libHash[libName] {
193 if _, ok := hashes[h]; ok {
194 continue
195 }
196 hashes[h] = struct{}{}
197 hl = append(hl, h)
198 }
199 if len(hl) > 0 {
200 sort.Sort(hashList{ni, libName, &hl})
201 for _, h := range hl {
202 c <- h
203 }
204 }
205 }
206 close(c)
207 }()
208 return c
209}
210
211// HashLibs returns the ordered array of library names using the license text
212// hashed as `h`.
213func (ni *NoticeIndex) HashLibs(h hash) []string {
214 libs := make([]string, 0, len(ni.hashLibInstall[h]))
215 for libName := range ni.hashLibInstall[h] {
216 libs = append(libs, libName)
217 }
218 sort.Strings(libs)
219 return libs
220}
221
222// HashLibInstalls returns the ordered array of install paths referencing
223// library `libName` using the license text hashed as `h`.
224func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
225 installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
226 for installPath := range ni.hashLibInstall[h][libName] {
227 installs = append(installs, installPath)
228 }
229 sort.Strings(installs)
230 return installs
231}
232
233// HashText returns the file content of the license text hashed as `h`.
234func (ni *NoticeIndex) HashText(h hash) []byte {
235 return ni.text[h]
236}
237
238// getLibName returns the name of the library associated with `noticeFor`.
239func (ni *NoticeIndex) getLibName(noticeFor *TargetNode) string {
240 // use name from METADATA if available
241 ln := ni.checkMetadata(noticeFor)
242 if len(ln) > 0 {
243 return ln
244 }
245 // use package_name: from license{} module if available
246 pn := noticeFor.PackageName()
247 if len(pn) > 0 {
248 return pn
249 }
250 for _, p := range noticeFor.Projects() {
251 if strings.HasPrefix(p, "prebuilts/") {
252 for _, licenseText := range noticeFor.LicenseTexts() {
253 if !strings.HasPrefix(licenseText, "prebuilts/") {
254 continue
255 }
256 for r, prefix := range SafePrebuiltPrefixes {
257 match := r.FindString(licenseText)
258 if len(match) == 0 {
259 continue
260 }
261 strip := SafePathPrefixes[prefix]
262 if strip {
263 // strip entire prefix
264 match = licenseText[len(match):]
265 } else {
266 // strip from prebuilts/ until safe prefix
267 match = licenseText[len(match)-len(prefix):]
268 }
269 // remove LICENSE or NOTICE or other filename
270 li := strings.LastIndex(match, "/")
271 if 0 < li {
272 match = match[:li]
273 }
274 // remove *licenses/ path segment and subdirectory if in path
275 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && 0 < offsets[len(offsets)-1][0] {
276 match = match[:offsets[len(offsets)-1][0]]
277 li = strings.LastIndex(match, "/")
278 if 0 < li {
279 match = match[:li]
280 }
281 }
282 return match
283 }
284 break
285 }
286 }
287 for prefix, strip := range SafePathPrefixes {
288 if strings.HasPrefix(p, prefix) {
289 if strip {
290 return p[len(prefix):]
291 } else {
292 return p
293 }
294 }
295 }
296 }
297 // strip off [./]meta_lic from license metadata path and extract base name
298 n := noticeFor.name[:len(noticeFor.name)-9]
299 li := strings.LastIndex(n, "/")
300 if 0 < li {
301 n = n[li+1:]
302 }
303 return n
304}
305
306// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
307func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
308 for _, p := range noticeFor.Projects() {
309 if name, ok := ni.projectName[p]; ok {
310 if name == noProjectName {
311 continue
312 }
313 return name
314 }
315 f, err := ni.rootFS.Open(filepath.Join(p, "METADATA"))
316 if err != nil {
317 ni.projectName[p] = noProjectName
318 continue
319 }
320 name := ""
321 description := ""
322 version := ""
323 s := bufio.NewScanner(f)
324 for s.Scan() {
325 line := s.Text()
326 m := nameRegexp.FindStringSubmatch(line)
327 if m != nil {
328 if 1 < len(m) && m[1] != "" {
329 name = m[1]
330 }
331 if version != "" {
332 break
333 }
334 continue
335 }
336 m = versionRegexp.FindStringSubmatch(line)
337 if m != nil {
338 if 1 < len(m) && m[1] != "" {
339 version = m[1]
340 }
341 if name != "" {
342 break
343 }
344 continue
345 }
346 m = descRegexp.FindStringSubmatch(line)
347 if m != nil {
348 if 1 < len(m) && m[1] != "" {
349 description = m[1]
350 }
351 }
352 }
353 _ = s.Err()
354 _ = f.Close()
355 if name != "" {
356 if version != "" {
357 if version[0] == 'v' || version[0] == 'V' {
358 ni.projectName[p] = name + "_" + version
359 } else {
360 ni.projectName[p] = name + "_v_" + version
361 }
362 } else {
363 ni.projectName[p] = name
364 }
365 return ni.projectName[p]
366 }
367 if description != "" {
368 ni.projectName[p] = description
369 return ni.projectName[p]
370 }
371 ni.projectName[p] = noProjectName
372 }
373 return ""
374}
375
376// addText reads and indexes the content of a license text file.
377func (ni *NoticeIndex) addText(file string) error {
378 f, err := ni.rootFS.Open(filepath.Clean(file))
379 if err != nil {
380 return fmt.Errorf("error opening license text file %q: %w", file, err)
381 }
382
383 // read the file
384 text, err := io.ReadAll(f)
385 if err != nil {
386 return fmt.Errorf("error reading license text file %q: %w", file, err)
387 }
388
389 hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
390 ni.hash[file] = hash
391 if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
392 ni.text[hash] = text
393 }
394
395 return nil
396}
397
398// getInstallPaths returns the names of the used dependencies mapped to their
399// installed locations.
400func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
401 if len(path) == 0 {
402 installs := attachesTo.Installed()
403 if 0 == len(installs) {
404 installs = attachesTo.Built()
405 }
406 return installs
407 }
408
409 var getInstalls func(path TargetEdgePath) []string
410
411 getInstalls = func(path TargetEdgePath) []string {
412 // deps contains the output targets from the dependencies in the path
413 var deps []string
414 if len(path) > 1 {
415 // recursively get the targets from the sub-path skipping 1 path segment
416 deps = getInstalls(path[1:])
417 } else {
418 // stop recursion at 1 path segment
419 deps = path[0].Dependency().TargetFiles()
420 }
421 size := 0
422 prefixes := path[0].Target().TargetFiles()
423 installMap := path[0].Target().InstallMap()
424 sources := path[0].Target().Sources()
425 for _, dep := range deps {
426 found := false
427 for _, source := range sources {
428 if strings.HasPrefix(dep, source) {
429 found = true
430 break
431 }
432 }
433 if !found {
434 continue
435 }
436 for _, im := range installMap {
437 if strings.HasPrefix(dep, im.FromPath) {
438 size += len(prefixes)
439 break
440 }
441 }
442 }
443
444 installs := make([]string, 0, size)
445 for _, dep := range deps {
446 found := false
447 for _, source := range sources {
448 if strings.HasPrefix(dep, source) {
449 found = true
450 break
451 }
452 }
453 if !found {
454 continue
455 }
456 for _, im := range installMap {
457 if strings.HasPrefix(dep, im.FromPath) {
458 for _, prefix := range prefixes {
459 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
460 }
461 break
462 }
463 }
464 }
465 return installs
466 }
467 allInstalls := getInstalls(path)
468 installs := path[0].Target().Installed()
469 if len(installs) == 0 {
470 return allInstalls
471 }
472 result := make([]string, 0, len(allInstalls))
473 for _, install := range allInstalls {
474 for _, prefix := range installs {
475 if strings.HasPrefix(install, prefix) {
476 result = append(result, install)
477 }
478 }
479 }
480 return result
481}
482
483// hash is an opaque string derived from md5sum.
484type hash struct {
485 key string
486}
487
488// String returns the hexadecimal representation of the hash.
489func (h hash) String() string {
490 return h.key
491}
492
493// hashList orders an array of hashes
494type hashList struct {
495 ni *NoticeIndex
496 libName string
497 hashes *[]hash
498}
499
500// Len returns the count of elements in the slice.
501func (l hashList) Len() int { return len(*l.hashes) }
502
503// Swap rearranges 2 elements of the slice so that each occupies the other's
504// former position.
505func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
506
507// Less returns true when the `i`th element is lexicographically less than
508// the `j`th element.
509func (l hashList) Less(i, j int) bool {
510 var insti, instj int
511 if 0 < len(l.libName) {
512 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
513 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
514 }
515 if insti == instj {
516 leni := len(l.ni.text[(*l.hashes)[i]])
517 lenj := len(l.ni.text[(*l.hashes)[j]])
518 if leni == lenj {
519 // all else equal, just order by hash value
520 return (*l.hashes)[i].key < (*l.hashes)[j].key
521 }
522 // put shortest texts first within same # of installs
523 return leni < lenj
524 }
525 // reverse order of # installs so that most popular appears first
526 return instj < insti
527}