blob: 58b1c3b434f4e861a60a9003be84d74c93cd55a1 [file] [log] [blame]
Bob Badoure6fdd142021-12-09 22:10:43 -08001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "bufio"
19 "crypto/md5"
20 "fmt"
21 "io"
22 "io/fs"
23 "path/filepath"
24 "regexp"
25 "sort"
26 "strings"
27)
28
29const (
30 noProjectName = "\u2205"
31)
32
33var (
34 nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
35 descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
36 versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
37 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
38)
39
40// NoticeIndex transforms license metadata into license text hashes, library
41// names, and install paths indexing them for fast lookup/iteration.
42type NoticeIndex struct {
43 // lg identifies the license graph to which the index applies.
44 lg *LicenseGraph
45 // rs identifies the set of resolutions upon which the index is based.
46 rs ResolutionSet
47 // shipped identifies the set of target nodes shipped directly or as derivative works.
48 shipped *TargetNodeSet
49 // rootFS locates the root of the file system from which to read the files.
50 rootFS fs.FS
51 // hash maps license text filenames to content hashes
52 hash map[string]hash
53 // text maps content hashes to content
54 text map[hash][]byte
55 // hashLibInstall maps hashes to libraries to install paths.
56 hashLibInstall map[hash]map[string]map[string]struct{}
Bob Badour6ea14572022-01-23 17:15:46 -080057 // installHashLib maps install paths to libraries to hashes.
58 installHashLib map[string]map[hash]map[string]struct{}
Bob Badoure6fdd142021-12-09 22:10:43 -080059 // libHash maps libraries to hashes.
60 libHash map[string]map[hash]struct{}
61 // targetHash maps target nodes to hashes.
62 targetHashes map[*TargetNode]map[hash]struct{}
63 // projectName maps project directory names to project name text.
64 projectName map[string]string
65}
66
67// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
68// using the files rooted at `rootFS`.
69func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
70 if rs == nil {
71 rs = ResolveNotices(lg)
72 }
73 ni := &NoticeIndex{
74 lg, rs, ShippedNodes(lg), rootFS,
75 make(map[string]hash),
76 make(map[hash][]byte),
77 make(map[hash]map[string]map[string]struct{}),
Bob Badour6ea14572022-01-23 17:15:46 -080078 make(map[string]map[hash]map[string]struct{}),
Bob Badoure6fdd142021-12-09 22:10:43 -080079 make(map[string]map[hash]struct{}),
80 make(map[*TargetNode]map[hash]struct{}),
81 make(map[string]string),
82 }
83
84 // index adds all license texts for `tn` to the index.
85 index := func(tn *TargetNode) (map[hash]struct{}, error) {
86 if hashes, ok := ni.targetHashes[tn]; ok {
87 return hashes, nil
88 }
89 hashes := make(map[hash]struct{})
90 for _, text := range tn.LicenseTexts() {
91 if _, ok := ni.hash[text]; !ok {
92 err := ni.addText(text)
93 if err != nil {
94 return nil, err
95 }
96 }
97 hash := ni.hash[text]
98 if _, ok := hashes[hash]; !ok {
99 hashes[hash] = struct{}{}
100 }
101 }
102 ni.targetHashes[tn] = hashes
103 return hashes, nil
104 }
105
106 link := func(libName string, hashes map[hash]struct{}, installPaths []string) {
107 if _, ok := ni.libHash[libName]; !ok {
108 ni.libHash[libName] = make(map[hash]struct{})
109 }
110 for h := range hashes {
111 if _, ok := ni.hashLibInstall[h]; !ok {
112 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
113 }
114 if _, ok := ni.libHash[libName][h]; !ok {
115 ni.libHash[libName][h] = struct{}{}
116 }
117 for _, installPath := range installPaths {
Bob Badour6ea14572022-01-23 17:15:46 -0800118 if _, ok := ni.installHashLib[installPath]; !ok {
119 ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
120 ni.installHashLib[installPath][h] = make(map[string]struct{})
121 ni.installHashLib[installPath][h][libName] = struct{}{}
122 } else if _, ok = ni.installHashLib[installPath][h]; !ok {
123 ni.installHashLib[installPath][h] = make(map[string]struct{})
124 ni.installHashLib[installPath][h][libName] = struct{}{}
125 } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
126 ni.installHashLib[installPath][h][libName] = struct{}{}
Bob Badoure6fdd142021-12-09 22:10:43 -0800127 }
128 if _, ok := ni.hashLibInstall[h]; !ok {
129 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
130 ni.hashLibInstall[h][libName] = make(map[string]struct{})
131 ni.hashLibInstall[h][libName][installPath] = struct{}{}
132 } else if _, ok = ni.hashLibInstall[h][libName]; !ok {
133 ni.hashLibInstall[h][libName] = make(map[string]struct{})
134 ni.hashLibInstall[h][libName][installPath] = struct{}{}
135 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
136 ni.hashLibInstall[h][libName][installPath] = struct{}{}
137 }
138 }
139 }
140 }
141
142 // returns error from walk below.
143 var err error
144
145 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
146 if err != nil {
147 return false
148 }
149 if !ni.shipped.Contains(tn) {
150 return false
151 }
152 installPaths := getInstallPaths(tn, path)
153 var hashes map[hash]struct{}
154 hashes, err = index(tn)
155 if err != nil {
156 return false
157 }
158 link(ni.getLibName(tn), hashes, installPaths)
159 if tn.IsContainer() {
160 return true
161 }
162
163 for _, r := range rs.Resolutions(tn) {
164 hashes, err = index(r.actsOn)
165 if err != nil {
166 return false
167 }
168 link(ni.getLibName(r.actsOn), hashes, installPaths)
169 }
170 return false
171 })
172
173 if err != nil {
174 return nil, err
175 }
176
177 return ni, nil
178}
179
180// Hashes returns an ordered channel of the hashed license texts.
181func (ni *NoticeIndex) Hashes() chan hash {
182 c := make(chan hash)
183 go func() {
184 libs := make([]string, 0, len(ni.libHash))
185 for libName := range ni.libHash {
186 libs = append(libs, libName)
187 }
188 sort.Strings(libs)
189 hashes := make(map[hash]struct{})
190 for _, libName := range libs {
191 hl := make([]hash, 0, len(ni.libHash[libName]))
192 for h := range ni.libHash[libName] {
193 if _, ok := hashes[h]; ok {
194 continue
195 }
196 hashes[h] = struct{}{}
197 hl = append(hl, h)
198 }
199 if len(hl) > 0 {
Bob Badour6ea14572022-01-23 17:15:46 -0800200 sort.Sort(hashList{ni, libName, "", &hl})
Bob Badoure6fdd142021-12-09 22:10:43 -0800201 for _, h := range hl {
202 c <- h
203 }
204 }
205 }
206 close(c)
207 }()
208 return c
209}
210
211// HashLibs returns the ordered array of library names using the license text
212// hashed as `h`.
213func (ni *NoticeIndex) HashLibs(h hash) []string {
214 libs := make([]string, 0, len(ni.hashLibInstall[h]))
215 for libName := range ni.hashLibInstall[h] {
216 libs = append(libs, libName)
217 }
218 sort.Strings(libs)
219 return libs
220}
221
222// HashLibInstalls returns the ordered array of install paths referencing
223// library `libName` using the license text hashed as `h`.
224func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
225 installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
226 for installPath := range ni.hashLibInstall[h][libName] {
227 installs = append(installs, installPath)
228 }
229 sort.Strings(installs)
230 return installs
231}
232
Bob Badour6ea14572022-01-23 17:15:46 -0800233// InstallPaths returns the ordered channel of indexed install paths.
234func (ni *NoticeIndex) InstallPaths() chan string {
235 c := make(chan string)
236 go func() {
237 paths := make([]string, 0, len(ni.installHashLib))
238 for path := range ni.installHashLib {
239 paths = append(paths, path)
240 }
241 sort.Strings(paths)
242 for _, installPath := range paths {
243 c <- installPath
244 }
245 close(c)
246 }()
247 return c
248}
249
250// InstallHashes returns the ordered array of hashes attached to `installPath`.
251func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
252 result := make([]hash, 0, len(ni.installHashLib[installPath]))
253 for h := range ni.installHashLib[installPath] {
254 result = append(result, h)
255 }
256 if len(result) > 0 {
257 sort.Sort(hashList{ni, "", installPath, &result})
258 }
259 return result
260}
261
262// InstallHashLibs returns the ordered array of library names attached to
263// `installPath` as hash `h`.
264func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
265 result := make([]string, 0, len(ni.installHashLib[installPath][h]))
266 for libName := range ni.installHashLib[installPath][h] {
267 result = append(result, libName)
268 }
269 sort.Strings(result)
270 return result
271}
272
Bob Badour00c8a382022-01-26 17:21:39 -0800273// Libraries returns the ordered channel of indexed library names.
274func (ni *NoticeIndex) Libraries() chan string {
275 c := make(chan string)
276 go func() {
277 libs := make([]string, 0, len(ni.libHash))
278 for lib := range ni.libHash {
279 libs = append(libs, lib)
280 }
281 sort.Strings(libs)
282 for _, lib := range libs {
283 c <- lib
284 }
285 close(c)
286 }()
287 return c
288}
289
Bob Badoure6fdd142021-12-09 22:10:43 -0800290// HashText returns the file content of the license text hashed as `h`.
291func (ni *NoticeIndex) HashText(h hash) []byte {
292 return ni.text[h]
293}
294
295// getLibName returns the name of the library associated with `noticeFor`.
296func (ni *NoticeIndex) getLibName(noticeFor *TargetNode) string {
297 // use name from METADATA if available
298 ln := ni.checkMetadata(noticeFor)
299 if len(ln) > 0 {
300 return ln
301 }
302 // use package_name: from license{} module if available
303 pn := noticeFor.PackageName()
304 if len(pn) > 0 {
305 return pn
306 }
307 for _, p := range noticeFor.Projects() {
308 if strings.HasPrefix(p, "prebuilts/") {
309 for _, licenseText := range noticeFor.LicenseTexts() {
310 if !strings.HasPrefix(licenseText, "prebuilts/") {
311 continue
312 }
313 for r, prefix := range SafePrebuiltPrefixes {
314 match := r.FindString(licenseText)
315 if len(match) == 0 {
316 continue
317 }
318 strip := SafePathPrefixes[prefix]
319 if strip {
320 // strip entire prefix
321 match = licenseText[len(match):]
322 } else {
323 // strip from prebuilts/ until safe prefix
324 match = licenseText[len(match)-len(prefix):]
325 }
326 // remove LICENSE or NOTICE or other filename
327 li := strings.LastIndex(match, "/")
328 if 0 < li {
329 match = match[:li]
330 }
331 // remove *licenses/ path segment and subdirectory if in path
332 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && 0 < offsets[len(offsets)-1][0] {
333 match = match[:offsets[len(offsets)-1][0]]
334 li = strings.LastIndex(match, "/")
335 if 0 < li {
336 match = match[:li]
337 }
338 }
339 return match
340 }
341 break
342 }
343 }
344 for prefix, strip := range SafePathPrefixes {
345 if strings.HasPrefix(p, prefix) {
346 if strip {
347 return p[len(prefix):]
348 } else {
349 return p
350 }
351 }
352 }
353 }
354 // strip off [./]meta_lic from license metadata path and extract base name
355 n := noticeFor.name[:len(noticeFor.name)-9]
356 li := strings.LastIndex(n, "/")
357 if 0 < li {
358 n = n[li+1:]
359 }
360 return n
361}
362
363// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
364func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
365 for _, p := range noticeFor.Projects() {
366 if name, ok := ni.projectName[p]; ok {
367 if name == noProjectName {
368 continue
369 }
370 return name
371 }
372 f, err := ni.rootFS.Open(filepath.Join(p, "METADATA"))
373 if err != nil {
374 ni.projectName[p] = noProjectName
375 continue
376 }
377 name := ""
378 description := ""
379 version := ""
380 s := bufio.NewScanner(f)
381 for s.Scan() {
382 line := s.Text()
383 m := nameRegexp.FindStringSubmatch(line)
384 if m != nil {
385 if 1 < len(m) && m[1] != "" {
386 name = m[1]
387 }
388 if version != "" {
389 break
390 }
391 continue
392 }
393 m = versionRegexp.FindStringSubmatch(line)
394 if m != nil {
395 if 1 < len(m) && m[1] != "" {
396 version = m[1]
397 }
398 if name != "" {
399 break
400 }
401 continue
402 }
403 m = descRegexp.FindStringSubmatch(line)
404 if m != nil {
405 if 1 < len(m) && m[1] != "" {
406 description = m[1]
407 }
408 }
409 }
410 _ = s.Err()
411 _ = f.Close()
412 if name != "" {
413 if version != "" {
414 if version[0] == 'v' || version[0] == 'V' {
415 ni.projectName[p] = name + "_" + version
416 } else {
417 ni.projectName[p] = name + "_v_" + version
418 }
419 } else {
420 ni.projectName[p] = name
421 }
422 return ni.projectName[p]
423 }
424 if description != "" {
425 ni.projectName[p] = description
426 return ni.projectName[p]
427 }
428 ni.projectName[p] = noProjectName
429 }
430 return ""
431}
432
433// addText reads and indexes the content of a license text file.
434func (ni *NoticeIndex) addText(file string) error {
435 f, err := ni.rootFS.Open(filepath.Clean(file))
436 if err != nil {
437 return fmt.Errorf("error opening license text file %q: %w", file, err)
438 }
439
440 // read the file
441 text, err := io.ReadAll(f)
442 if err != nil {
443 return fmt.Errorf("error reading license text file %q: %w", file, err)
444 }
445
446 hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
447 ni.hash[file] = hash
448 if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
449 ni.text[hash] = text
450 }
451
452 return nil
453}
454
455// getInstallPaths returns the names of the used dependencies mapped to their
456// installed locations.
457func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
458 if len(path) == 0 {
459 installs := attachesTo.Installed()
460 if 0 == len(installs) {
461 installs = attachesTo.Built()
462 }
463 return installs
464 }
465
466 var getInstalls func(path TargetEdgePath) []string
467
468 getInstalls = func(path TargetEdgePath) []string {
469 // deps contains the output targets from the dependencies in the path
470 var deps []string
471 if len(path) > 1 {
472 // recursively get the targets from the sub-path skipping 1 path segment
473 deps = getInstalls(path[1:])
474 } else {
475 // stop recursion at 1 path segment
476 deps = path[0].Dependency().TargetFiles()
477 }
478 size := 0
479 prefixes := path[0].Target().TargetFiles()
480 installMap := path[0].Target().InstallMap()
481 sources := path[0].Target().Sources()
482 for _, dep := range deps {
483 found := false
484 for _, source := range sources {
485 if strings.HasPrefix(dep, source) {
486 found = true
487 break
488 }
489 }
490 if !found {
491 continue
492 }
493 for _, im := range installMap {
494 if strings.HasPrefix(dep, im.FromPath) {
495 size += len(prefixes)
496 break
497 }
498 }
499 }
500
501 installs := make([]string, 0, size)
502 for _, dep := range deps {
503 found := false
504 for _, source := range sources {
505 if strings.HasPrefix(dep, source) {
506 found = true
507 break
508 }
509 }
510 if !found {
511 continue
512 }
513 for _, im := range installMap {
514 if strings.HasPrefix(dep, im.FromPath) {
515 for _, prefix := range prefixes {
516 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
517 }
518 break
519 }
520 }
521 }
522 return installs
523 }
524 allInstalls := getInstalls(path)
525 installs := path[0].Target().Installed()
526 if len(installs) == 0 {
527 return allInstalls
528 }
529 result := make([]string, 0, len(allInstalls))
530 for _, install := range allInstalls {
531 for _, prefix := range installs {
532 if strings.HasPrefix(install, prefix) {
533 result = append(result, install)
534 }
535 }
536 }
537 return result
538}
539
540// hash is an opaque string derived from md5sum.
541type hash struct {
542 key string
543}
544
545// String returns the hexadecimal representation of the hash.
546func (h hash) String() string {
547 return h.key
548}
549
550// hashList orders an array of hashes
551type hashList struct {
Bob Badour6ea14572022-01-23 17:15:46 -0800552 ni *NoticeIndex
553 libName string
554 installPath string
555 hashes *[]hash
Bob Badoure6fdd142021-12-09 22:10:43 -0800556}
557
558// Len returns the count of elements in the slice.
559func (l hashList) Len() int { return len(*l.hashes) }
560
561// Swap rearranges 2 elements of the slice so that each occupies the other's
562// former position.
563func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
564
565// Less returns true when the `i`th element is lexicographically less than
566// the `j`th element.
567func (l hashList) Less(i, j int) bool {
568 var insti, instj int
569 if 0 < len(l.libName) {
570 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
571 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
Bob Badour6ea14572022-01-23 17:15:46 -0800572 } else {
573 libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
574 libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
575 libsis := strings.Join(libsi, " ")
576 libsjs := strings.Join(libsj, " ")
577 if libsis != libsjs {
578 return libsis < libsjs
579 }
Bob Badoure6fdd142021-12-09 22:10:43 -0800580 }
581 if insti == instj {
582 leni := len(l.ni.text[(*l.hashes)[i]])
583 lenj := len(l.ni.text[(*l.hashes)[j]])
584 if leni == lenj {
585 // all else equal, just order by hash value
586 return (*l.hashes)[i].key < (*l.hashes)[j].key
587 }
588 // put shortest texts first within same # of installs
589 return leni < lenj
590 }
591 // reverse order of # installs so that most popular appears first
592 return instj < insti
593}