blob: 7bebe3d12429e25ee4b9f50401e34302760ac418 [file] [log] [blame]
Bob Badoure6fdd142021-12-09 22:10:43 -08001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "bufio"
19 "crypto/md5"
20 "fmt"
21 "io"
22 "io/fs"
23 "path/filepath"
24 "regexp"
25 "sort"
26 "strings"
27)
28
29const (
30 noProjectName = "\u2205"
31)
32
33var (
34 nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
35 descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
36 versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
37 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
38)
39
40// NoticeIndex transforms license metadata into license text hashes, library
41// names, and install paths indexing them for fast lookup/iteration.
42type NoticeIndex struct {
43 // lg identifies the license graph to which the index applies.
44 lg *LicenseGraph
45 // rs identifies the set of resolutions upon which the index is based.
46 rs ResolutionSet
47 // shipped identifies the set of target nodes shipped directly or as derivative works.
48 shipped *TargetNodeSet
49 // rootFS locates the root of the file system from which to read the files.
50 rootFS fs.FS
51 // hash maps license text filenames to content hashes
52 hash map[string]hash
53 // text maps content hashes to content
54 text map[hash][]byte
55 // hashLibInstall maps hashes to libraries to install paths.
56 hashLibInstall map[hash]map[string]map[string]struct{}
Bob Badour6ea14572022-01-23 17:15:46 -080057 // installHashLib maps install paths to libraries to hashes.
58 installHashLib map[string]map[hash]map[string]struct{}
Bob Badoure6fdd142021-12-09 22:10:43 -080059 // libHash maps libraries to hashes.
60 libHash map[string]map[hash]struct{}
61 // targetHash maps target nodes to hashes.
62 targetHashes map[*TargetNode]map[hash]struct{}
63 // projectName maps project directory names to project name text.
64 projectName map[string]string
Colin Crossbb45f8c2022-01-28 15:18:19 -080065 // files lists all the files accessed during indexing
66 files []string
Bob Badoure6fdd142021-12-09 22:10:43 -080067}
68
69// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
70// using the files rooted at `rootFS`.
71func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
72 if rs == nil {
73 rs = ResolveNotices(lg)
74 }
75 ni := &NoticeIndex{
Colin Crossbb45f8c2022-01-28 15:18:19 -080076 lg: lg,
77 rs: rs,
78 shipped: ShippedNodes(lg),
79 rootFS: rootFS,
80 hash: make(map[string]hash),
81 text: make(map[hash][]byte),
82 hashLibInstall: make(map[hash]map[string]map[string]struct{}),
83 installHashLib: make(map[string]map[hash]map[string]struct{}),
84 libHash: make(map[string]map[hash]struct{}),
85 targetHashes: make(map[*TargetNode]map[hash]struct{}),
86 projectName: make(map[string]string),
Bob Badoure6fdd142021-12-09 22:10:43 -080087 }
88
89 // index adds all license texts for `tn` to the index.
90 index := func(tn *TargetNode) (map[hash]struct{}, error) {
91 if hashes, ok := ni.targetHashes[tn]; ok {
92 return hashes, nil
93 }
94 hashes := make(map[hash]struct{})
95 for _, text := range tn.LicenseTexts() {
96 if _, ok := ni.hash[text]; !ok {
97 err := ni.addText(text)
98 if err != nil {
99 return nil, err
100 }
101 }
102 hash := ni.hash[text]
103 if _, ok := hashes[hash]; !ok {
104 hashes[hash] = struct{}{}
105 }
106 }
107 ni.targetHashes[tn] = hashes
108 return hashes, nil
109 }
110
111 link := func(libName string, hashes map[hash]struct{}, installPaths []string) {
112 if _, ok := ni.libHash[libName]; !ok {
113 ni.libHash[libName] = make(map[hash]struct{})
114 }
115 for h := range hashes {
116 if _, ok := ni.hashLibInstall[h]; !ok {
117 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
118 }
119 if _, ok := ni.libHash[libName][h]; !ok {
120 ni.libHash[libName][h] = struct{}{}
121 }
122 for _, installPath := range installPaths {
Bob Badour6ea14572022-01-23 17:15:46 -0800123 if _, ok := ni.installHashLib[installPath]; !ok {
124 ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
125 ni.installHashLib[installPath][h] = make(map[string]struct{})
126 ni.installHashLib[installPath][h][libName] = struct{}{}
127 } else if _, ok = ni.installHashLib[installPath][h]; !ok {
128 ni.installHashLib[installPath][h] = make(map[string]struct{})
129 ni.installHashLib[installPath][h][libName] = struct{}{}
130 } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
131 ni.installHashLib[installPath][h][libName] = struct{}{}
Bob Badoure6fdd142021-12-09 22:10:43 -0800132 }
133 if _, ok := ni.hashLibInstall[h]; !ok {
134 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
135 ni.hashLibInstall[h][libName] = make(map[string]struct{})
136 ni.hashLibInstall[h][libName][installPath] = struct{}{}
137 } else if _, ok = ni.hashLibInstall[h][libName]; !ok {
138 ni.hashLibInstall[h][libName] = make(map[string]struct{})
139 ni.hashLibInstall[h][libName][installPath] = struct{}{}
140 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
141 ni.hashLibInstall[h][libName][installPath] = struct{}{}
142 }
143 }
144 }
145 }
146
147 // returns error from walk below.
148 var err error
149
150 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
151 if err != nil {
152 return false
153 }
154 if !ni.shipped.Contains(tn) {
155 return false
156 }
157 installPaths := getInstallPaths(tn, path)
158 var hashes map[hash]struct{}
159 hashes, err = index(tn)
160 if err != nil {
161 return false
162 }
163 link(ni.getLibName(tn), hashes, installPaths)
164 if tn.IsContainer() {
165 return true
166 }
167
168 for _, r := range rs.Resolutions(tn) {
169 hashes, err = index(r.actsOn)
170 if err != nil {
171 return false
172 }
173 link(ni.getLibName(r.actsOn), hashes, installPaths)
174 }
175 return false
176 })
177
178 if err != nil {
179 return nil, err
180 }
181
182 return ni, nil
183}
184
185// Hashes returns an ordered channel of the hashed license texts.
186func (ni *NoticeIndex) Hashes() chan hash {
187 c := make(chan hash)
188 go func() {
189 libs := make([]string, 0, len(ni.libHash))
190 for libName := range ni.libHash {
191 libs = append(libs, libName)
192 }
193 sort.Strings(libs)
194 hashes := make(map[hash]struct{})
195 for _, libName := range libs {
196 hl := make([]hash, 0, len(ni.libHash[libName]))
197 for h := range ni.libHash[libName] {
198 if _, ok := hashes[h]; ok {
199 continue
200 }
201 hashes[h] = struct{}{}
202 hl = append(hl, h)
203 }
204 if len(hl) > 0 {
Bob Badour6ea14572022-01-23 17:15:46 -0800205 sort.Sort(hashList{ni, libName, "", &hl})
Bob Badoure6fdd142021-12-09 22:10:43 -0800206 for _, h := range hl {
207 c <- h
208 }
209 }
210 }
211 close(c)
212 }()
213 return c
214}
215
Colin Crossbb45f8c2022-01-28 15:18:19 -0800216// InputNoticeFiles returns the list of files that were hashed during IndexLicenseTexts.
217func (ni *NoticeIndex) InputNoticeFiles() []string {
218 files := append([]string(nil), ni.files...)
219 sort.Strings(files)
220 return files
221}
222
Bob Badoure6fdd142021-12-09 22:10:43 -0800223// HashLibs returns the ordered array of library names using the license text
224// hashed as `h`.
225func (ni *NoticeIndex) HashLibs(h hash) []string {
226 libs := make([]string, 0, len(ni.hashLibInstall[h]))
227 for libName := range ni.hashLibInstall[h] {
228 libs = append(libs, libName)
229 }
230 sort.Strings(libs)
231 return libs
232}
233
234// HashLibInstalls returns the ordered array of install paths referencing
235// library `libName` using the license text hashed as `h`.
236func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
237 installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
238 for installPath := range ni.hashLibInstall[h][libName] {
239 installs = append(installs, installPath)
240 }
241 sort.Strings(installs)
242 return installs
243}
244
Bob Badour6ea14572022-01-23 17:15:46 -0800245// InstallPaths returns the ordered channel of indexed install paths.
246func (ni *NoticeIndex) InstallPaths() chan string {
247 c := make(chan string)
248 go func() {
249 paths := make([]string, 0, len(ni.installHashLib))
250 for path := range ni.installHashLib {
251 paths = append(paths, path)
252 }
253 sort.Strings(paths)
254 for _, installPath := range paths {
255 c <- installPath
256 }
257 close(c)
258 }()
259 return c
260}
261
262// InstallHashes returns the ordered array of hashes attached to `installPath`.
263func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
264 result := make([]hash, 0, len(ni.installHashLib[installPath]))
265 for h := range ni.installHashLib[installPath] {
266 result = append(result, h)
267 }
268 if len(result) > 0 {
269 sort.Sort(hashList{ni, "", installPath, &result})
270 }
271 return result
272}
273
274// InstallHashLibs returns the ordered array of library names attached to
275// `installPath` as hash `h`.
276func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
277 result := make([]string, 0, len(ni.installHashLib[installPath][h]))
278 for libName := range ni.installHashLib[installPath][h] {
279 result = append(result, libName)
280 }
281 sort.Strings(result)
282 return result
283}
284
Bob Badour00c8a382022-01-26 17:21:39 -0800285// Libraries returns the ordered channel of indexed library names.
286func (ni *NoticeIndex) Libraries() chan string {
287 c := make(chan string)
288 go func() {
289 libs := make([]string, 0, len(ni.libHash))
290 for lib := range ni.libHash {
291 libs = append(libs, lib)
292 }
293 sort.Strings(libs)
294 for _, lib := range libs {
295 c <- lib
296 }
297 close(c)
298 }()
299 return c
300}
301
Bob Badoure6fdd142021-12-09 22:10:43 -0800302// HashText returns the file content of the license text hashed as `h`.
303func (ni *NoticeIndex) HashText(h hash) []byte {
304 return ni.text[h]
305}
306
307// getLibName returns the name of the library associated with `noticeFor`.
308func (ni *NoticeIndex) getLibName(noticeFor *TargetNode) string {
309 // use name from METADATA if available
310 ln := ni.checkMetadata(noticeFor)
311 if len(ln) > 0 {
312 return ln
313 }
314 // use package_name: from license{} module if available
315 pn := noticeFor.PackageName()
316 if len(pn) > 0 {
317 return pn
318 }
319 for _, p := range noticeFor.Projects() {
320 if strings.HasPrefix(p, "prebuilts/") {
321 for _, licenseText := range noticeFor.LicenseTexts() {
322 if !strings.HasPrefix(licenseText, "prebuilts/") {
323 continue
324 }
325 for r, prefix := range SafePrebuiltPrefixes {
326 match := r.FindString(licenseText)
327 if len(match) == 0 {
328 continue
329 }
330 strip := SafePathPrefixes[prefix]
331 if strip {
332 // strip entire prefix
333 match = licenseText[len(match):]
334 } else {
335 // strip from prebuilts/ until safe prefix
336 match = licenseText[len(match)-len(prefix):]
337 }
338 // remove LICENSE or NOTICE or other filename
339 li := strings.LastIndex(match, "/")
340 if 0 < li {
341 match = match[:li]
342 }
343 // remove *licenses/ path segment and subdirectory if in path
344 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && 0 < offsets[len(offsets)-1][0] {
345 match = match[:offsets[len(offsets)-1][0]]
346 li = strings.LastIndex(match, "/")
347 if 0 < li {
348 match = match[:li]
349 }
350 }
351 return match
352 }
353 break
354 }
355 }
356 for prefix, strip := range SafePathPrefixes {
357 if strings.HasPrefix(p, prefix) {
358 if strip {
359 return p[len(prefix):]
360 } else {
361 return p
362 }
363 }
364 }
365 }
366 // strip off [./]meta_lic from license metadata path and extract base name
367 n := noticeFor.name[:len(noticeFor.name)-9]
368 li := strings.LastIndex(n, "/")
369 if 0 < li {
370 n = n[li+1:]
371 }
372 return n
373}
374
375// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
376func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
377 for _, p := range noticeFor.Projects() {
378 if name, ok := ni.projectName[p]; ok {
379 if name == noProjectName {
380 continue
381 }
382 return name
383 }
384 f, err := ni.rootFS.Open(filepath.Join(p, "METADATA"))
385 if err != nil {
386 ni.projectName[p] = noProjectName
387 continue
388 }
389 name := ""
390 description := ""
391 version := ""
392 s := bufio.NewScanner(f)
393 for s.Scan() {
394 line := s.Text()
395 m := nameRegexp.FindStringSubmatch(line)
396 if m != nil {
397 if 1 < len(m) && m[1] != "" {
398 name = m[1]
399 }
400 if version != "" {
401 break
402 }
403 continue
404 }
405 m = versionRegexp.FindStringSubmatch(line)
406 if m != nil {
407 if 1 < len(m) && m[1] != "" {
408 version = m[1]
409 }
410 if name != "" {
411 break
412 }
413 continue
414 }
415 m = descRegexp.FindStringSubmatch(line)
416 if m != nil {
417 if 1 < len(m) && m[1] != "" {
418 description = m[1]
419 }
420 }
421 }
422 _ = s.Err()
423 _ = f.Close()
424 if name != "" {
425 if version != "" {
426 if version[0] == 'v' || version[0] == 'V' {
427 ni.projectName[p] = name + "_" + version
428 } else {
429 ni.projectName[p] = name + "_v_" + version
430 }
431 } else {
432 ni.projectName[p] = name
433 }
434 return ni.projectName[p]
435 }
436 if description != "" {
437 ni.projectName[p] = description
438 return ni.projectName[p]
439 }
440 ni.projectName[p] = noProjectName
441 }
442 return ""
443}
444
445// addText reads and indexes the content of a license text file.
446func (ni *NoticeIndex) addText(file string) error {
447 f, err := ni.rootFS.Open(filepath.Clean(file))
448 if err != nil {
449 return fmt.Errorf("error opening license text file %q: %w", file, err)
450 }
451
452 // read the file
453 text, err := io.ReadAll(f)
454 if err != nil {
455 return fmt.Errorf("error reading license text file %q: %w", file, err)
456 }
457
458 hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
459 ni.hash[file] = hash
460 if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
461 ni.text[hash] = text
462 }
463
Colin Crossbb45f8c2022-01-28 15:18:19 -0800464 ni.files = append(ni.files, file)
465
Bob Badoure6fdd142021-12-09 22:10:43 -0800466 return nil
467}
468
469// getInstallPaths returns the names of the used dependencies mapped to their
470// installed locations.
471func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
472 if len(path) == 0 {
473 installs := attachesTo.Installed()
474 if 0 == len(installs) {
475 installs = attachesTo.Built()
476 }
477 return installs
478 }
479
480 var getInstalls func(path TargetEdgePath) []string
481
482 getInstalls = func(path TargetEdgePath) []string {
483 // deps contains the output targets from the dependencies in the path
484 var deps []string
485 if len(path) > 1 {
486 // recursively get the targets from the sub-path skipping 1 path segment
487 deps = getInstalls(path[1:])
488 } else {
489 // stop recursion at 1 path segment
490 deps = path[0].Dependency().TargetFiles()
491 }
492 size := 0
493 prefixes := path[0].Target().TargetFiles()
494 installMap := path[0].Target().InstallMap()
495 sources := path[0].Target().Sources()
496 for _, dep := range deps {
497 found := false
498 for _, source := range sources {
499 if strings.HasPrefix(dep, source) {
500 found = true
501 break
502 }
503 }
504 if !found {
505 continue
506 }
507 for _, im := range installMap {
508 if strings.HasPrefix(dep, im.FromPath) {
509 size += len(prefixes)
510 break
511 }
512 }
513 }
514
515 installs := make([]string, 0, size)
516 for _, dep := range deps {
517 found := false
518 for _, source := range sources {
519 if strings.HasPrefix(dep, source) {
520 found = true
521 break
522 }
523 }
524 if !found {
525 continue
526 }
527 for _, im := range installMap {
528 if strings.HasPrefix(dep, im.FromPath) {
529 for _, prefix := range prefixes {
530 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
531 }
532 break
533 }
534 }
535 }
536 return installs
537 }
538 allInstalls := getInstalls(path)
539 installs := path[0].Target().Installed()
540 if len(installs) == 0 {
541 return allInstalls
542 }
543 result := make([]string, 0, len(allInstalls))
544 for _, install := range allInstalls {
545 for _, prefix := range installs {
546 if strings.HasPrefix(install, prefix) {
547 result = append(result, install)
548 }
549 }
550 }
551 return result
552}
553
554// hash is an opaque string derived from md5sum.
555type hash struct {
556 key string
557}
558
559// String returns the hexadecimal representation of the hash.
560func (h hash) String() string {
561 return h.key
562}
563
564// hashList orders an array of hashes
565type hashList struct {
Bob Badour6ea14572022-01-23 17:15:46 -0800566 ni *NoticeIndex
567 libName string
568 installPath string
569 hashes *[]hash
Bob Badoure6fdd142021-12-09 22:10:43 -0800570}
571
572// Len returns the count of elements in the slice.
573func (l hashList) Len() int { return len(*l.hashes) }
574
575// Swap rearranges 2 elements of the slice so that each occupies the other's
576// former position.
577func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
578
579// Less returns true when the `i`th element is lexicographically less than
580// the `j`th element.
581func (l hashList) Less(i, j int) bool {
582 var insti, instj int
583 if 0 < len(l.libName) {
584 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
585 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
Bob Badour6ea14572022-01-23 17:15:46 -0800586 } else {
587 libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
588 libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
589 libsis := strings.Join(libsi, " ")
590 libsjs := strings.Join(libsj, " ")
591 if libsis != libsjs {
592 return libsis < libsjs
593 }
Bob Badoure6fdd142021-12-09 22:10:43 -0800594 }
595 if insti == instj {
596 leni := len(l.ni.text[(*l.hashes)[i]])
597 lenj := len(l.ni.text[(*l.hashes)[j]])
598 if leni == lenj {
599 // all else equal, just order by hash value
600 return (*l.hashes)[i].key < (*l.hashes)[j].key
601 }
602 // put shortest texts first within same # of installs
603 return leni < lenj
604 }
605 // reverse order of # installs so that most popular appears first
606 return instj < insti
607}