blob: 5b7f37655f4d47955708a76db0106e5ea7c10262 [file] [log] [blame]
Bob Badoure6fdd142021-12-09 22:10:43 -08001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "bufio"
19 "crypto/md5"
20 "fmt"
21 "io"
22 "io/fs"
23 "path/filepath"
24 "regexp"
25 "sort"
26 "strings"
27)
28
29const (
30 noProjectName = "\u2205"
31)
32
33var (
34 nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
35 descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
36 versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
37 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
38)
39
40// NoticeIndex transforms license metadata into license text hashes, library
41// names, and install paths indexing them for fast lookup/iteration.
42type NoticeIndex struct {
43 // lg identifies the license graph to which the index applies.
44 lg *LicenseGraph
45 // rs identifies the set of resolutions upon which the index is based.
46 rs ResolutionSet
47 // shipped identifies the set of target nodes shipped directly or as derivative works.
48 shipped *TargetNodeSet
49 // rootFS locates the root of the file system from which to read the files.
50 rootFS fs.FS
51 // hash maps license text filenames to content hashes
52 hash map[string]hash
53 // text maps content hashes to content
54 text map[hash][]byte
55 // hashLibInstall maps hashes to libraries to install paths.
56 hashLibInstall map[hash]map[string]map[string]struct{}
Bob Badour6ea14572022-01-23 17:15:46 -080057 // installHashLib maps install paths to libraries to hashes.
58 installHashLib map[string]map[hash]map[string]struct{}
Bob Badoure6fdd142021-12-09 22:10:43 -080059 // libHash maps libraries to hashes.
60 libHash map[string]map[hash]struct{}
61 // targetHash maps target nodes to hashes.
62 targetHashes map[*TargetNode]map[hash]struct{}
63 // projectName maps project directory names to project name text.
64 projectName map[string]string
65}
66
67// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
68// using the files rooted at `rootFS`.
69func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
70 if rs == nil {
71 rs = ResolveNotices(lg)
72 }
73 ni := &NoticeIndex{
74 lg, rs, ShippedNodes(lg), rootFS,
75 make(map[string]hash),
76 make(map[hash][]byte),
77 make(map[hash]map[string]map[string]struct{}),
Bob Badour6ea14572022-01-23 17:15:46 -080078 make(map[string]map[hash]map[string]struct{}),
Bob Badoure6fdd142021-12-09 22:10:43 -080079 make(map[string]map[hash]struct{}),
80 make(map[*TargetNode]map[hash]struct{}),
81 make(map[string]string),
82 }
83
84 // index adds all license texts for `tn` to the index.
85 index := func(tn *TargetNode) (map[hash]struct{}, error) {
86 if hashes, ok := ni.targetHashes[tn]; ok {
87 return hashes, nil
88 }
89 hashes := make(map[hash]struct{})
90 for _, text := range tn.LicenseTexts() {
91 if _, ok := ni.hash[text]; !ok {
92 err := ni.addText(text)
93 if err != nil {
94 return nil, err
95 }
96 }
97 hash := ni.hash[text]
98 if _, ok := hashes[hash]; !ok {
99 hashes[hash] = struct{}{}
100 }
101 }
102 ni.targetHashes[tn] = hashes
103 return hashes, nil
104 }
105
106 link := func(libName string, hashes map[hash]struct{}, installPaths []string) {
107 if _, ok := ni.libHash[libName]; !ok {
108 ni.libHash[libName] = make(map[hash]struct{})
109 }
110 for h := range hashes {
111 if _, ok := ni.hashLibInstall[h]; !ok {
112 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
113 }
114 if _, ok := ni.libHash[libName][h]; !ok {
115 ni.libHash[libName][h] = struct{}{}
116 }
117 for _, installPath := range installPaths {
Bob Badour6ea14572022-01-23 17:15:46 -0800118 if _, ok := ni.installHashLib[installPath]; !ok {
119 ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
120 ni.installHashLib[installPath][h] = make(map[string]struct{})
121 ni.installHashLib[installPath][h][libName] = struct{}{}
122 } else if _, ok = ni.installHashLib[installPath][h]; !ok {
123 ni.installHashLib[installPath][h] = make(map[string]struct{})
124 ni.installHashLib[installPath][h][libName] = struct{}{}
125 } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
126 ni.installHashLib[installPath][h][libName] = struct{}{}
Bob Badoure6fdd142021-12-09 22:10:43 -0800127 }
128 if _, ok := ni.hashLibInstall[h]; !ok {
129 ni.hashLibInstall[h] = make(map[string]map[string]struct{})
130 ni.hashLibInstall[h][libName] = make(map[string]struct{})
131 ni.hashLibInstall[h][libName][installPath] = struct{}{}
132 } else if _, ok = ni.hashLibInstall[h][libName]; !ok {
133 ni.hashLibInstall[h][libName] = make(map[string]struct{})
134 ni.hashLibInstall[h][libName][installPath] = struct{}{}
135 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
136 ni.hashLibInstall[h][libName][installPath] = struct{}{}
137 }
138 }
139 }
140 }
141
142 // returns error from walk below.
143 var err error
144
145 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
146 if err != nil {
147 return false
148 }
149 if !ni.shipped.Contains(tn) {
150 return false
151 }
152 installPaths := getInstallPaths(tn, path)
153 var hashes map[hash]struct{}
154 hashes, err = index(tn)
155 if err != nil {
156 return false
157 }
158 link(ni.getLibName(tn), hashes, installPaths)
159 if tn.IsContainer() {
160 return true
161 }
162
163 for _, r := range rs.Resolutions(tn) {
164 hashes, err = index(r.actsOn)
165 if err != nil {
166 return false
167 }
168 link(ni.getLibName(r.actsOn), hashes, installPaths)
169 }
170 return false
171 })
172
173 if err != nil {
174 return nil, err
175 }
176
177 return ni, nil
178}
179
180// Hashes returns an ordered channel of the hashed license texts.
181func (ni *NoticeIndex) Hashes() chan hash {
182 c := make(chan hash)
183 go func() {
184 libs := make([]string, 0, len(ni.libHash))
185 for libName := range ni.libHash {
186 libs = append(libs, libName)
187 }
188 sort.Strings(libs)
189 hashes := make(map[hash]struct{})
190 for _, libName := range libs {
191 hl := make([]hash, 0, len(ni.libHash[libName]))
192 for h := range ni.libHash[libName] {
193 if _, ok := hashes[h]; ok {
194 continue
195 }
196 hashes[h] = struct{}{}
197 hl = append(hl, h)
198 }
199 if len(hl) > 0 {
Bob Badour6ea14572022-01-23 17:15:46 -0800200 sort.Sort(hashList{ni, libName, "", &hl})
Bob Badoure6fdd142021-12-09 22:10:43 -0800201 for _, h := range hl {
202 c <- h
203 }
204 }
205 }
206 close(c)
207 }()
208 return c
209}
210
211// HashLibs returns the ordered array of library names using the license text
212// hashed as `h`.
213func (ni *NoticeIndex) HashLibs(h hash) []string {
214 libs := make([]string, 0, len(ni.hashLibInstall[h]))
215 for libName := range ni.hashLibInstall[h] {
216 libs = append(libs, libName)
217 }
218 sort.Strings(libs)
219 return libs
220}
221
222// HashLibInstalls returns the ordered array of install paths referencing
223// library `libName` using the license text hashed as `h`.
224func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
225 installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
226 for installPath := range ni.hashLibInstall[h][libName] {
227 installs = append(installs, installPath)
228 }
229 sort.Strings(installs)
230 return installs
231}
232
Bob Badour6ea14572022-01-23 17:15:46 -0800233// InstallPaths returns the ordered channel of indexed install paths.
234func (ni *NoticeIndex) InstallPaths() chan string {
235 c := make(chan string)
236 go func() {
237 paths := make([]string, 0, len(ni.installHashLib))
238 for path := range ni.installHashLib {
239 paths = append(paths, path)
240 }
241 sort.Strings(paths)
242 for _, installPath := range paths {
243 c <- installPath
244 }
245 close(c)
246 }()
247 return c
248}
249
250// InstallHashes returns the ordered array of hashes attached to `installPath`.
251func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
252 result := make([]hash, 0, len(ni.installHashLib[installPath]))
253 for h := range ni.installHashLib[installPath] {
254 result = append(result, h)
255 }
256 if len(result) > 0 {
257 sort.Sort(hashList{ni, "", installPath, &result})
258 }
259 return result
260}
261
262// InstallHashLibs returns the ordered array of library names attached to
263// `installPath` as hash `h`.
264func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
265 result := make([]string, 0, len(ni.installHashLib[installPath][h]))
266 for libName := range ni.installHashLib[installPath][h] {
267 result = append(result, libName)
268 }
269 sort.Strings(result)
270 return result
271}
272
Bob Badoure6fdd142021-12-09 22:10:43 -0800273// HashText returns the file content of the license text hashed as `h`.
274func (ni *NoticeIndex) HashText(h hash) []byte {
275 return ni.text[h]
276}
277
278// getLibName returns the name of the library associated with `noticeFor`.
279func (ni *NoticeIndex) getLibName(noticeFor *TargetNode) string {
280 // use name from METADATA if available
281 ln := ni.checkMetadata(noticeFor)
282 if len(ln) > 0 {
283 return ln
284 }
285 // use package_name: from license{} module if available
286 pn := noticeFor.PackageName()
287 if len(pn) > 0 {
288 return pn
289 }
290 for _, p := range noticeFor.Projects() {
291 if strings.HasPrefix(p, "prebuilts/") {
292 for _, licenseText := range noticeFor.LicenseTexts() {
293 if !strings.HasPrefix(licenseText, "prebuilts/") {
294 continue
295 }
296 for r, prefix := range SafePrebuiltPrefixes {
297 match := r.FindString(licenseText)
298 if len(match) == 0 {
299 continue
300 }
301 strip := SafePathPrefixes[prefix]
302 if strip {
303 // strip entire prefix
304 match = licenseText[len(match):]
305 } else {
306 // strip from prebuilts/ until safe prefix
307 match = licenseText[len(match)-len(prefix):]
308 }
309 // remove LICENSE or NOTICE or other filename
310 li := strings.LastIndex(match, "/")
311 if 0 < li {
312 match = match[:li]
313 }
314 // remove *licenses/ path segment and subdirectory if in path
315 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && 0 < offsets[len(offsets)-1][0] {
316 match = match[:offsets[len(offsets)-1][0]]
317 li = strings.LastIndex(match, "/")
318 if 0 < li {
319 match = match[:li]
320 }
321 }
322 return match
323 }
324 break
325 }
326 }
327 for prefix, strip := range SafePathPrefixes {
328 if strings.HasPrefix(p, prefix) {
329 if strip {
330 return p[len(prefix):]
331 } else {
332 return p
333 }
334 }
335 }
336 }
337 // strip off [./]meta_lic from license metadata path and extract base name
338 n := noticeFor.name[:len(noticeFor.name)-9]
339 li := strings.LastIndex(n, "/")
340 if 0 < li {
341 n = n[li+1:]
342 }
343 return n
344}
345
346// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
347func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
348 for _, p := range noticeFor.Projects() {
349 if name, ok := ni.projectName[p]; ok {
350 if name == noProjectName {
351 continue
352 }
353 return name
354 }
355 f, err := ni.rootFS.Open(filepath.Join(p, "METADATA"))
356 if err != nil {
357 ni.projectName[p] = noProjectName
358 continue
359 }
360 name := ""
361 description := ""
362 version := ""
363 s := bufio.NewScanner(f)
364 for s.Scan() {
365 line := s.Text()
366 m := nameRegexp.FindStringSubmatch(line)
367 if m != nil {
368 if 1 < len(m) && m[1] != "" {
369 name = m[1]
370 }
371 if version != "" {
372 break
373 }
374 continue
375 }
376 m = versionRegexp.FindStringSubmatch(line)
377 if m != nil {
378 if 1 < len(m) && m[1] != "" {
379 version = m[1]
380 }
381 if name != "" {
382 break
383 }
384 continue
385 }
386 m = descRegexp.FindStringSubmatch(line)
387 if m != nil {
388 if 1 < len(m) && m[1] != "" {
389 description = m[1]
390 }
391 }
392 }
393 _ = s.Err()
394 _ = f.Close()
395 if name != "" {
396 if version != "" {
397 if version[0] == 'v' || version[0] == 'V' {
398 ni.projectName[p] = name + "_" + version
399 } else {
400 ni.projectName[p] = name + "_v_" + version
401 }
402 } else {
403 ni.projectName[p] = name
404 }
405 return ni.projectName[p]
406 }
407 if description != "" {
408 ni.projectName[p] = description
409 return ni.projectName[p]
410 }
411 ni.projectName[p] = noProjectName
412 }
413 return ""
414}
415
416// addText reads and indexes the content of a license text file.
417func (ni *NoticeIndex) addText(file string) error {
418 f, err := ni.rootFS.Open(filepath.Clean(file))
419 if err != nil {
420 return fmt.Errorf("error opening license text file %q: %w", file, err)
421 }
422
423 // read the file
424 text, err := io.ReadAll(f)
425 if err != nil {
426 return fmt.Errorf("error reading license text file %q: %w", file, err)
427 }
428
429 hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
430 ni.hash[file] = hash
431 if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
432 ni.text[hash] = text
433 }
434
435 return nil
436}
437
438// getInstallPaths returns the names of the used dependencies mapped to their
439// installed locations.
440func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
441 if len(path) == 0 {
442 installs := attachesTo.Installed()
443 if 0 == len(installs) {
444 installs = attachesTo.Built()
445 }
446 return installs
447 }
448
449 var getInstalls func(path TargetEdgePath) []string
450
451 getInstalls = func(path TargetEdgePath) []string {
452 // deps contains the output targets from the dependencies in the path
453 var deps []string
454 if len(path) > 1 {
455 // recursively get the targets from the sub-path skipping 1 path segment
456 deps = getInstalls(path[1:])
457 } else {
458 // stop recursion at 1 path segment
459 deps = path[0].Dependency().TargetFiles()
460 }
461 size := 0
462 prefixes := path[0].Target().TargetFiles()
463 installMap := path[0].Target().InstallMap()
464 sources := path[0].Target().Sources()
465 for _, dep := range deps {
466 found := false
467 for _, source := range sources {
468 if strings.HasPrefix(dep, source) {
469 found = true
470 break
471 }
472 }
473 if !found {
474 continue
475 }
476 for _, im := range installMap {
477 if strings.HasPrefix(dep, im.FromPath) {
478 size += len(prefixes)
479 break
480 }
481 }
482 }
483
484 installs := make([]string, 0, size)
485 for _, dep := range deps {
486 found := false
487 for _, source := range sources {
488 if strings.HasPrefix(dep, source) {
489 found = true
490 break
491 }
492 }
493 if !found {
494 continue
495 }
496 for _, im := range installMap {
497 if strings.HasPrefix(dep, im.FromPath) {
498 for _, prefix := range prefixes {
499 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
500 }
501 break
502 }
503 }
504 }
505 return installs
506 }
507 allInstalls := getInstalls(path)
508 installs := path[0].Target().Installed()
509 if len(installs) == 0 {
510 return allInstalls
511 }
512 result := make([]string, 0, len(allInstalls))
513 for _, install := range allInstalls {
514 for _, prefix := range installs {
515 if strings.HasPrefix(install, prefix) {
516 result = append(result, install)
517 }
518 }
519 }
520 return result
521}
522
523// hash is an opaque string derived from md5sum.
524type hash struct {
525 key string
526}
527
528// String returns the hexadecimal representation of the hash.
529func (h hash) String() string {
530 return h.key
531}
532
533// hashList orders an array of hashes
534type hashList struct {
Bob Badour6ea14572022-01-23 17:15:46 -0800535 ni *NoticeIndex
536 libName string
537 installPath string
538 hashes *[]hash
Bob Badoure6fdd142021-12-09 22:10:43 -0800539}
540
541// Len returns the count of elements in the slice.
542func (l hashList) Len() int { return len(*l.hashes) }
543
544// Swap rearranges 2 elements of the slice so that each occupies the other's
545// former position.
546func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
547
548// Less returns true when the `i`th element is lexicographically less than
549// the `j`th element.
550func (l hashList) Less(i, j int) bool {
551 var insti, instj int
552 if 0 < len(l.libName) {
553 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
554 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
Bob Badour6ea14572022-01-23 17:15:46 -0800555 } else {
556 libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
557 libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
558 libsis := strings.Join(libsi, " ")
559 libsjs := strings.Join(libsj, " ")
560 if libsis != libsjs {
561 return libsis < libsjs
562 }
Bob Badoure6fdd142021-12-09 22:10:43 -0800563 }
564 if insti == instj {
565 leni := len(l.ni.text[(*l.hashes)[i]])
566 lenj := len(l.ni.text[(*l.hashes)[j]])
567 if leni == lenj {
568 // all else equal, just order by hash value
569 return (*l.hashes)[i].key < (*l.hashes)[j].key
570 }
571 // put shortest texts first within same # of installs
572 return leni < lenj
573 }
574 // reverse order of # installs so that most popular appears first
575 return instj < insti
576}