Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 1 | // Copyright 2021 Google LLC |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package compliance |
| 16 | |
| 17 | import ( |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 18 | "crypto/md5" |
| 19 | "fmt" |
| 20 | "io" |
| 21 | "io/fs" |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 22 | "net/url" |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 23 | "path/filepath" |
| 24 | "regexp" |
| 25 | "sort" |
| 26 | "strings" |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 27 | |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 28 | "android/soong/tools/compliance/projectmetadata" |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 29 | ) |
| 30 | |
| 31 | var ( |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 32 | licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`) |
| 33 | ) |
| 34 | |
| 35 | // NoticeIndex transforms license metadata into license text hashes, library |
| 36 | // names, and install paths indexing them for fast lookup/iteration. |
| 37 | type NoticeIndex struct { |
| 38 | // lg identifies the license graph to which the index applies. |
| 39 | lg *LicenseGraph |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 40 | // pmix indexes project metadata |
| 41 | pmix *projectmetadata.Index |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 42 | // rs identifies the set of resolutions upon which the index is based. |
| 43 | rs ResolutionSet |
| 44 | // shipped identifies the set of target nodes shipped directly or as derivative works. |
Bob Badour | 3fe369c | 2022-12-18 14:15:02 -0800 | [diff] [blame] | 45 | shipped TargetNodeSet |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 46 | // rootFS locates the root of the file system from which to read the files. |
| 47 | rootFS fs.FS |
| 48 | // hash maps license text filenames to content hashes |
| 49 | hash map[string]hash |
| 50 | // text maps content hashes to content |
| 51 | text map[hash][]byte |
| 52 | // hashLibInstall maps hashes to libraries to install paths. |
| 53 | hashLibInstall map[hash]map[string]map[string]struct{} |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 54 | // installHashLib maps install paths to libraries to hashes. |
| 55 | installHashLib map[string]map[hash]map[string]struct{} |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 56 | // libHash maps libraries to hashes. |
| 57 | libHash map[string]map[hash]struct{} |
| 58 | // targetHash maps target nodes to hashes. |
| 59 | targetHashes map[*TargetNode]map[hash]struct{} |
| 60 | // projectName maps project directory names to project name text. |
| 61 | projectName map[string]string |
Colin Cross | bb45f8c | 2022-01-28 15:18:19 -0800 | [diff] [blame] | 62 | // files lists all the files accessed during indexing |
| 63 | files []string |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 64 | } |
| 65 | |
| 66 | // IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs` |
| 67 | // using the files rooted at `rootFS`. |
| 68 | func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) { |
| 69 | if rs == nil { |
| 70 | rs = ResolveNotices(lg) |
| 71 | } |
| 72 | ni := &NoticeIndex{ |
Colin Cross | bb45f8c | 2022-01-28 15:18:19 -0800 | [diff] [blame] | 73 | lg: lg, |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 74 | pmix: projectmetadata.NewIndex(rootFS), |
Colin Cross | bb45f8c | 2022-01-28 15:18:19 -0800 | [diff] [blame] | 75 | rs: rs, |
| 76 | shipped: ShippedNodes(lg), |
| 77 | rootFS: rootFS, |
| 78 | hash: make(map[string]hash), |
| 79 | text: make(map[hash][]byte), |
| 80 | hashLibInstall: make(map[hash]map[string]map[string]struct{}), |
| 81 | installHashLib: make(map[string]map[hash]map[string]struct{}), |
| 82 | libHash: make(map[string]map[hash]struct{}), |
| 83 | targetHashes: make(map[*TargetNode]map[hash]struct{}), |
| 84 | projectName: make(map[string]string), |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 85 | } |
| 86 | |
| 87 | // index adds all license texts for `tn` to the index. |
| 88 | index := func(tn *TargetNode) (map[hash]struct{}, error) { |
| 89 | if hashes, ok := ni.targetHashes[tn]; ok { |
| 90 | return hashes, nil |
| 91 | } |
| 92 | hashes := make(map[hash]struct{}) |
| 93 | for _, text := range tn.LicenseTexts() { |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 94 | fname := strings.SplitN(text, ":", 2)[0] |
| 95 | if _, ok := ni.hash[fname]; !ok { |
| 96 | err := ni.addText(fname) |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 97 | if err != nil { |
| 98 | return nil, err |
| 99 | } |
| 100 | } |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 101 | hash := ni.hash[fname] |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 102 | if _, ok := hashes[hash]; !ok { |
| 103 | hashes[hash] = struct{}{} |
| 104 | } |
| 105 | } |
| 106 | ni.targetHashes[tn] = hashes |
| 107 | return hashes, nil |
| 108 | } |
| 109 | |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 110 | link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) error { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 111 | for h := range hashes { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 112 | libName, err := ni.getLibName(tn, h) |
| 113 | if err != nil { |
| 114 | return err |
| 115 | } |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 116 | if _, ok := ni.libHash[libName]; !ok { |
| 117 | ni.libHash[libName] = make(map[hash]struct{}) |
| 118 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 119 | if _, ok := ni.hashLibInstall[h]; !ok { |
| 120 | ni.hashLibInstall[h] = make(map[string]map[string]struct{}) |
| 121 | } |
| 122 | if _, ok := ni.libHash[libName][h]; !ok { |
| 123 | ni.libHash[libName][h] = struct{}{} |
| 124 | } |
| 125 | for _, installPath := range installPaths { |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 126 | if _, ok := ni.installHashLib[installPath]; !ok { |
| 127 | ni.installHashLib[installPath] = make(map[hash]map[string]struct{}) |
| 128 | ni.installHashLib[installPath][h] = make(map[string]struct{}) |
| 129 | ni.installHashLib[installPath][h][libName] = struct{}{} |
| 130 | } else if _, ok = ni.installHashLib[installPath][h]; !ok { |
| 131 | ni.installHashLib[installPath][h] = make(map[string]struct{}) |
| 132 | ni.installHashLib[installPath][h][libName] = struct{}{} |
| 133 | } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok { |
| 134 | ni.installHashLib[installPath][h][libName] = struct{}{} |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 135 | } |
| 136 | if _, ok := ni.hashLibInstall[h]; !ok { |
| 137 | ni.hashLibInstall[h] = make(map[string]map[string]struct{}) |
| 138 | ni.hashLibInstall[h][libName] = make(map[string]struct{}) |
| 139 | ni.hashLibInstall[h][libName][installPath] = struct{}{} |
| 140 | } else if _, ok = ni.hashLibInstall[h][libName]; !ok { |
| 141 | ni.hashLibInstall[h][libName] = make(map[string]struct{}) |
| 142 | ni.hashLibInstall[h][libName][installPath] = struct{}{} |
| 143 | } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok { |
| 144 | ni.hashLibInstall[h][libName][installPath] = struct{}{} |
| 145 | } |
| 146 | } |
| 147 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 148 | return nil |
| 149 | } |
| 150 | |
| 151 | cacheMetadata := func(tn *TargetNode) { |
| 152 | ni.pmix.MetadataForProjects(tn.Projects()...) |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 153 | } |
| 154 | |
| 155 | // returns error from walk below. |
| 156 | var err error |
| 157 | |
| 158 | WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool { |
| 159 | if err != nil { |
| 160 | return false |
| 161 | } |
| 162 | if !ni.shipped.Contains(tn) { |
| 163 | return false |
| 164 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 165 | go cacheMetadata(tn) |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 166 | installPaths := getInstallPaths(tn, path) |
| 167 | var hashes map[hash]struct{} |
| 168 | hashes, err = index(tn) |
| 169 | if err != nil { |
| 170 | return false |
| 171 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 172 | err = link(tn, hashes, installPaths) |
| 173 | if err != nil { |
| 174 | return false |
| 175 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 176 | if tn.IsContainer() { |
| 177 | return true |
| 178 | } |
| 179 | |
| 180 | for _, r := range rs.Resolutions(tn) { |
| 181 | hashes, err = index(r.actsOn) |
| 182 | if err != nil { |
| 183 | return false |
| 184 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 185 | err = link(r.actsOn, hashes, installPaths) |
| 186 | if err != nil { |
| 187 | return false |
| 188 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 189 | } |
| 190 | return false |
| 191 | }) |
| 192 | |
| 193 | if err != nil { |
| 194 | return nil, err |
| 195 | } |
| 196 | |
| 197 | return ni, nil |
| 198 | } |
| 199 | |
| 200 | // Hashes returns an ordered channel of the hashed license texts. |
| 201 | func (ni *NoticeIndex) Hashes() chan hash { |
| 202 | c := make(chan hash) |
| 203 | go func() { |
| 204 | libs := make([]string, 0, len(ni.libHash)) |
| 205 | for libName := range ni.libHash { |
| 206 | libs = append(libs, libName) |
| 207 | } |
| 208 | sort.Strings(libs) |
| 209 | hashes := make(map[hash]struct{}) |
| 210 | for _, libName := range libs { |
| 211 | hl := make([]hash, 0, len(ni.libHash[libName])) |
| 212 | for h := range ni.libHash[libName] { |
| 213 | if _, ok := hashes[h]; ok { |
| 214 | continue |
| 215 | } |
| 216 | hashes[h] = struct{}{} |
| 217 | hl = append(hl, h) |
| 218 | } |
| 219 | if len(hl) > 0 { |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 220 | sort.Sort(hashList{ni, libName, "", &hl}) |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 221 | for _, h := range hl { |
| 222 | c <- h |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | close(c) |
| 227 | }() |
| 228 | return c |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 229 | |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 230 | } |
| 231 | |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 232 | // InputFiles returns the complete list of files read during indexing. |
| 233 | func (ni *NoticeIndex) InputFiles() []string { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 234 | projectMeta := ni.pmix.AllMetadataFiles() |
Bob Badour | 3afc300 | 2022-11-02 10:24:04 -0700 | [diff] [blame] | 235 | files := make([]string, 0, len(ni.files) + len(ni.lg.targets) + len(projectMeta)) |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 236 | files = append(files, ni.files...) |
Bob Badour | 3afc300 | 2022-11-02 10:24:04 -0700 | [diff] [blame] | 237 | for f := range ni.lg.targets { |
| 238 | files = append(files, f) |
| 239 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 240 | files = append(files, projectMeta...) |
Colin Cross | bb45f8c | 2022-01-28 15:18:19 -0800 | [diff] [blame] | 241 | return files |
| 242 | } |
| 243 | |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 244 | // HashLibs returns the ordered array of library names using the license text |
| 245 | // hashed as `h`. |
| 246 | func (ni *NoticeIndex) HashLibs(h hash) []string { |
| 247 | libs := make([]string, 0, len(ni.hashLibInstall[h])) |
| 248 | for libName := range ni.hashLibInstall[h] { |
| 249 | libs = append(libs, libName) |
| 250 | } |
| 251 | sort.Strings(libs) |
| 252 | return libs |
| 253 | } |
| 254 | |
| 255 | // HashLibInstalls returns the ordered array of install paths referencing |
| 256 | // library `libName` using the license text hashed as `h`. |
| 257 | func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string { |
| 258 | installs := make([]string, 0, len(ni.hashLibInstall[h][libName])) |
| 259 | for installPath := range ni.hashLibInstall[h][libName] { |
| 260 | installs = append(installs, installPath) |
| 261 | } |
| 262 | sort.Strings(installs) |
| 263 | return installs |
| 264 | } |
| 265 | |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 266 | // InstallPaths returns the ordered channel of indexed install paths. |
| 267 | func (ni *NoticeIndex) InstallPaths() chan string { |
| 268 | c := make(chan string) |
| 269 | go func() { |
| 270 | paths := make([]string, 0, len(ni.installHashLib)) |
| 271 | for path := range ni.installHashLib { |
| 272 | paths = append(paths, path) |
| 273 | } |
| 274 | sort.Strings(paths) |
| 275 | for _, installPath := range paths { |
| 276 | c <- installPath |
| 277 | } |
| 278 | close(c) |
| 279 | }() |
| 280 | return c |
| 281 | } |
| 282 | |
| 283 | // InstallHashes returns the ordered array of hashes attached to `installPath`. |
| 284 | func (ni *NoticeIndex) InstallHashes(installPath string) []hash { |
| 285 | result := make([]hash, 0, len(ni.installHashLib[installPath])) |
| 286 | for h := range ni.installHashLib[installPath] { |
| 287 | result = append(result, h) |
| 288 | } |
| 289 | if len(result) > 0 { |
| 290 | sort.Sort(hashList{ni, "", installPath, &result}) |
| 291 | } |
| 292 | return result |
| 293 | } |
| 294 | |
| 295 | // InstallHashLibs returns the ordered array of library names attached to |
| 296 | // `installPath` as hash `h`. |
| 297 | func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string { |
| 298 | result := make([]string, 0, len(ni.installHashLib[installPath][h])) |
| 299 | for libName := range ni.installHashLib[installPath][h] { |
| 300 | result = append(result, libName) |
| 301 | } |
| 302 | sort.Strings(result) |
| 303 | return result |
| 304 | } |
| 305 | |
Bob Badour | 00c8a38 | 2022-01-26 17:21:39 -0800 | [diff] [blame] | 306 | // Libraries returns the ordered channel of indexed library names. |
| 307 | func (ni *NoticeIndex) Libraries() chan string { |
| 308 | c := make(chan string) |
| 309 | go func() { |
| 310 | libs := make([]string, 0, len(ni.libHash)) |
| 311 | for lib := range ni.libHash { |
| 312 | libs = append(libs, lib) |
| 313 | } |
| 314 | sort.Strings(libs) |
| 315 | for _, lib := range libs { |
| 316 | c <- lib |
| 317 | } |
| 318 | close(c) |
| 319 | }() |
| 320 | return c |
| 321 | } |
| 322 | |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 323 | // HashText returns the file content of the license text hashed as `h`. |
| 324 | func (ni *NoticeIndex) HashText(h hash) []byte { |
| 325 | return ni.text[h] |
| 326 | } |
| 327 | |
| 328 | // getLibName returns the name of the library associated with `noticeFor`. |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 329 | func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) (string, error) { |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 330 | for _, text := range noticeFor.LicenseTexts() { |
| 331 | if !strings.Contains(text, ":") { |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 332 | if ni.hash[text].key != h.key { |
| 333 | continue |
| 334 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 335 | ln, err := ni.checkMetadataForLicenseText(noticeFor, text) |
| 336 | if err != nil { |
| 337 | return "", err |
| 338 | } |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 339 | if len(ln) > 0 { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 340 | return ln, nil |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 341 | } |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 342 | continue |
| 343 | } |
| 344 | |
| 345 | fields := strings.SplitN(text, ":", 2) |
| 346 | fname, pname := fields[0], fields[1] |
| 347 | if ni.hash[fname].key != h.key { |
| 348 | continue |
| 349 | } |
| 350 | |
| 351 | ln, err := url.QueryUnescape(pname) |
| 352 | if err != nil { |
| 353 | continue |
| 354 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 355 | return ln, nil |
Bob Badour | 5028abc | 2022-02-09 11:56:58 -0800 | [diff] [blame] | 356 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 357 | // use name from METADATA if available |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 358 | ln, err := ni.checkMetadata(noticeFor) |
| 359 | if err != nil { |
| 360 | return "", err |
| 361 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 362 | if len(ln) > 0 { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 363 | return ln, nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 364 | } |
| 365 | // use package_name: from license{} module if available |
| 366 | pn := noticeFor.PackageName() |
| 367 | if len(pn) > 0 { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 368 | return pn, nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 369 | } |
| 370 | for _, p := range noticeFor.Projects() { |
| 371 | if strings.HasPrefix(p, "prebuilts/") { |
| 372 | for _, licenseText := range noticeFor.LicenseTexts() { |
| 373 | if !strings.HasPrefix(licenseText, "prebuilts/") { |
| 374 | continue |
| 375 | } |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 376 | if !strings.Contains(licenseText, ":") { |
| 377 | if ni.hash[licenseText].key != h.key { |
| 378 | continue |
| 379 | } |
| 380 | } else { |
| 381 | fields := strings.SplitN(licenseText, ":", 2) |
| 382 | fname := fields[0] |
| 383 | if ni.hash[fname].key != h.key { |
| 384 | continue |
| 385 | } |
| 386 | } |
Colin Cross | 4b54525 | 2022-09-28 15:36:53 -0700 | [diff] [blame] | 387 | for _, safePrebuiltPrefix := range safePrebuiltPrefixes { |
| 388 | match := safePrebuiltPrefix.re.FindString(licenseText) |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 389 | if len(match) == 0 { |
| 390 | continue |
| 391 | } |
Colin Cross | 4b54525 | 2022-09-28 15:36:53 -0700 | [diff] [blame] | 392 | if safePrebuiltPrefix.strip { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 393 | // strip entire prefix |
| 394 | match = licenseText[len(match):] |
| 395 | } else { |
| 396 | // strip from prebuilts/ until safe prefix |
Colin Cross | 4b54525 | 2022-09-28 15:36:53 -0700 | [diff] [blame] | 397 | match = licenseText[len(match)-len(safePrebuiltPrefix.prefix):] |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 398 | } |
| 399 | // remove LICENSE or NOTICE or other filename |
| 400 | li := strings.LastIndex(match, "/") |
Bob Badour | e9b38c1 | 2022-02-09 15:56:59 -0800 | [diff] [blame] | 401 | if li > 0 { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 402 | match = match[:li] |
| 403 | } |
| 404 | // remove *licenses/ path segment and subdirectory if in path |
Bob Badour | e9b38c1 | 2022-02-09 15:56:59 -0800 | [diff] [blame] | 405 | if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 406 | match = match[:offsets[len(offsets)-1][0]] |
| 407 | li = strings.LastIndex(match, "/") |
Bob Badour | e9b38c1 | 2022-02-09 15:56:59 -0800 | [diff] [blame] | 408 | if li > 0 { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 409 | match = match[:li] |
| 410 | } |
| 411 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 412 | return match, nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 413 | } |
| 414 | break |
| 415 | } |
| 416 | } |
Colin Cross | 4b54525 | 2022-09-28 15:36:53 -0700 | [diff] [blame] | 417 | for _, safePathPrefix := range safePathPrefixes { |
| 418 | if strings.HasPrefix(p, safePathPrefix.prefix) { |
| 419 | if safePathPrefix.strip { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 420 | return p[len(safePathPrefix.prefix):], nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 421 | } else { |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 422 | return p, nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 423 | } |
| 424 | } |
| 425 | } |
| 426 | } |
| 427 | // strip off [./]meta_lic from license metadata path and extract base name |
| 428 | n := noticeFor.name[:len(noticeFor.name)-9] |
| 429 | li := strings.LastIndex(n, "/") |
Bob Badour | e9b38c1 | 2022-02-09 15:56:59 -0800 | [diff] [blame] | 430 | if li > 0 { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 431 | n = n[li+1:] |
| 432 | } |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 433 | fi := strings.Index(n, "@") |
| 434 | if fi > 0 { |
| 435 | n = n[:fi] |
| 436 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 437 | return n, nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 438 | } |
| 439 | |
| 440 | // checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`. |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 441 | func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) (string, error) { |
| 442 | pms, err := ni.pmix.MetadataForProjects(noticeFor.Projects()...) |
| 443 | if err != nil { |
| 444 | return "", err |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 445 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 446 | for _, pm := range pms { |
| 447 | name := pm.VersionedName() |
| 448 | if name != "" { |
| 449 | return name, nil |
| 450 | } |
| 451 | } |
| 452 | return "", nil |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 453 | } |
| 454 | |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 455 | // checkMetadataForLicenseText |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 456 | func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) (string, error) { |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 457 | p := "" |
| 458 | for _, proj := range noticeFor.Projects() { |
| 459 | if strings.HasPrefix(licenseText, proj) { |
| 460 | p = proj |
| 461 | } |
| 462 | } |
| 463 | if len(p) == 0 { |
| 464 | p = filepath.Dir(licenseText) |
| 465 | for { |
| 466 | fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git")) |
| 467 | if err == nil && fi.IsDir() { |
| 468 | break |
| 469 | } |
| 470 | if strings.Contains(p, "/") && p != "/" { |
| 471 | p = filepath.Dir(p) |
| 472 | continue |
| 473 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 474 | return "", nil |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 475 | } |
| 476 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 477 | pms, err := ni.pmix.MetadataForProjects(p) |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 478 | if err != nil { |
| 479 | return "", err |
| 480 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 481 | if pms == nil { |
| 482 | return "", nil |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 483 | } |
Bob Badour | ab5cfbd | 2022-10-17 17:40:04 -0700 | [diff] [blame] | 484 | return pms[0].VersionedName(), nil |
Bob Badour | 7757005 | 2022-02-28 20:05:44 -0800 | [diff] [blame] | 485 | } |
| 486 | |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 487 | // addText reads and indexes the content of a license text file. |
| 488 | func (ni *NoticeIndex) addText(file string) error { |
| 489 | f, err := ni.rootFS.Open(filepath.Clean(file)) |
| 490 | if err != nil { |
| 491 | return fmt.Errorf("error opening license text file %q: %w", file, err) |
| 492 | } |
| 493 | |
| 494 | // read the file |
| 495 | text, err := io.ReadAll(f) |
| 496 | if err != nil { |
| 497 | return fmt.Errorf("error reading license text file %q: %w", file, err) |
| 498 | } |
| 499 | |
| 500 | hash := hash{fmt.Sprintf("%x", md5.Sum(text))} |
| 501 | ni.hash[file] = hash |
| 502 | if _, alreadyPresent := ni.text[hash]; !alreadyPresent { |
| 503 | ni.text[hash] = text |
| 504 | } |
| 505 | |
Colin Cross | bb45f8c | 2022-01-28 15:18:19 -0800 | [diff] [blame] | 506 | ni.files = append(ni.files, file) |
| 507 | |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 508 | return nil |
| 509 | } |
| 510 | |
| 511 | // getInstallPaths returns the names of the used dependencies mapped to their |
| 512 | // installed locations. |
| 513 | func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string { |
| 514 | if len(path) == 0 { |
| 515 | installs := attachesTo.Installed() |
| 516 | if 0 == len(installs) { |
| 517 | installs = attachesTo.Built() |
| 518 | } |
| 519 | return installs |
| 520 | } |
| 521 | |
| 522 | var getInstalls func(path TargetEdgePath) []string |
| 523 | |
| 524 | getInstalls = func(path TargetEdgePath) []string { |
| 525 | // deps contains the output targets from the dependencies in the path |
| 526 | var deps []string |
| 527 | if len(path) > 1 { |
| 528 | // recursively get the targets from the sub-path skipping 1 path segment |
| 529 | deps = getInstalls(path[1:]) |
| 530 | } else { |
| 531 | // stop recursion at 1 path segment |
| 532 | deps = path[0].Dependency().TargetFiles() |
| 533 | } |
| 534 | size := 0 |
| 535 | prefixes := path[0].Target().TargetFiles() |
| 536 | installMap := path[0].Target().InstallMap() |
| 537 | sources := path[0].Target().Sources() |
| 538 | for _, dep := range deps { |
| 539 | found := false |
| 540 | for _, source := range sources { |
| 541 | if strings.HasPrefix(dep, source) { |
| 542 | found = true |
| 543 | break |
| 544 | } |
| 545 | } |
| 546 | if !found { |
| 547 | continue |
| 548 | } |
| 549 | for _, im := range installMap { |
| 550 | if strings.HasPrefix(dep, im.FromPath) { |
| 551 | size += len(prefixes) |
| 552 | break |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | installs := make([]string, 0, size) |
| 558 | for _, dep := range deps { |
| 559 | found := false |
| 560 | for _, source := range sources { |
| 561 | if strings.HasPrefix(dep, source) { |
| 562 | found = true |
| 563 | break |
| 564 | } |
| 565 | } |
| 566 | if !found { |
| 567 | continue |
| 568 | } |
| 569 | for _, im := range installMap { |
| 570 | if strings.HasPrefix(dep, im.FromPath) { |
| 571 | for _, prefix := range prefixes { |
| 572 | installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):]) |
| 573 | } |
| 574 | break |
| 575 | } |
| 576 | } |
| 577 | } |
| 578 | return installs |
| 579 | } |
| 580 | allInstalls := getInstalls(path) |
| 581 | installs := path[0].Target().Installed() |
| 582 | if len(installs) == 0 { |
| 583 | return allInstalls |
| 584 | } |
| 585 | result := make([]string, 0, len(allInstalls)) |
| 586 | for _, install := range allInstalls { |
| 587 | for _, prefix := range installs { |
| 588 | if strings.HasPrefix(install, prefix) { |
| 589 | result = append(result, install) |
| 590 | } |
| 591 | } |
| 592 | } |
| 593 | return result |
| 594 | } |
| 595 | |
| 596 | // hash is an opaque string derived from md5sum. |
| 597 | type hash struct { |
| 598 | key string |
| 599 | } |
| 600 | |
| 601 | // String returns the hexadecimal representation of the hash. |
| 602 | func (h hash) String() string { |
| 603 | return h.key |
| 604 | } |
| 605 | |
| 606 | // hashList orders an array of hashes |
| 607 | type hashList struct { |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 608 | ni *NoticeIndex |
| 609 | libName string |
| 610 | installPath string |
| 611 | hashes *[]hash |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 612 | } |
| 613 | |
| 614 | // Len returns the count of elements in the slice. |
| 615 | func (l hashList) Len() int { return len(*l.hashes) } |
| 616 | |
| 617 | // Swap rearranges 2 elements of the slice so that each occupies the other's |
| 618 | // former position. |
| 619 | func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] } |
| 620 | |
| 621 | // Less returns true when the `i`th element is lexicographically less than |
| 622 | // the `j`th element. |
| 623 | func (l hashList) Less(i, j int) bool { |
| 624 | var insti, instj int |
Bob Badour | e9b38c1 | 2022-02-09 15:56:59 -0800 | [diff] [blame] | 625 | if len(l.libName) > 0 { |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 626 | insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName]) |
| 627 | instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName]) |
Bob Badour | 6ea1457 | 2022-01-23 17:15:46 -0800 | [diff] [blame] | 628 | } else { |
| 629 | libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i]) |
| 630 | libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j]) |
| 631 | libsis := strings.Join(libsi, " ") |
| 632 | libsjs := strings.Join(libsj, " ") |
| 633 | if libsis != libsjs { |
| 634 | return libsis < libsjs |
| 635 | } |
Bob Badour | e6fdd14 | 2021-12-09 22:10:43 -0800 | [diff] [blame] | 636 | } |
| 637 | if insti == instj { |
| 638 | leni := len(l.ni.text[(*l.hashes)[i]]) |
| 639 | lenj := len(l.ni.text[(*l.hashes)[j]]) |
| 640 | if leni == lenj { |
| 641 | // all else equal, just order by hash value |
| 642 | return (*l.hashes)[i].key < (*l.hashes)[j].key |
| 643 | } |
| 644 | // put shortest texts first within same # of installs |
| 645 | return leni < lenj |
| 646 | } |
| 647 | // reverse order of # installs so that most popular appears first |
| 648 | return instj < insti |
| 649 | } |