compliance package structures for license metadata
package to read, consume, and analyze license metadata and dependency
graph.
Bug: 68860345
Bug: 151177513
Bug: 151953481
Change-Id: I3ebf44e4d5195b9851fd076161049bf82ed76dd2
diff --git a/tools/compliance/readgraph.go b/tools/compliance/readgraph.go
new file mode 100644
index 0000000..0b5ebfe
--- /dev/null
+++ b/tools/compliance/readgraph.go
@@ -0,0 +1,259 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compliance
+
+import (
+ "fmt"
+ "io"
+ "io/fs"
+ "strings"
+ "sync"
+
+ "android/soong/compliance/license_metadata_proto"
+
+ "google.golang.org/protobuf/encoding/prototext"
+)
+
+var (
+ // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
+ ConcurrentReaders = 5
+)
+
+// result describes the outcome of reading and parsing a single license metadata file.
+type result struct {
+ // file identifies the path to the license metadata file
+ file string
+
+ // target contains the parsed metadata or nil if an error
+ target *TargetNode
+
+ // edges contains the parsed dependencies
+ edges []*dependencyEdge
+
+ // err is nil unless an error occurs
+ err error
+}
+
+// receiver coordinates the tasks for reading and parsing license metadata files.
+type receiver struct {
+ // lg accumulates the read metadata and becomes the final resulting LicensGraph.
+ lg *LicenseGraph
+
+ // rootFS locates the root of the file system from which to read the files.
+ rootFS fs.FS
+
+ // stderr identifies the error output writer.
+ stderr io.Writer
+
+ // task provides a fixed-size task pool to limit concurrent open files etc.
+ task chan bool
+
+ // results returns one license metadata file result at a time.
+ results chan *result
+
+ // wg detects when done
+ wg sync.WaitGroup
+}
+
+// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
+//
+// `files` become the root files of the graph for top-down walks of the graph.
+func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
+ if len(files) == 0 {
+ return nil, fmt.Errorf("no license metadata to analyze")
+ }
+ if ConcurrentReaders < 1 {
+ return nil, fmt.Errorf("need at least one task in pool")
+ }
+
+ lg := newLicenseGraph()
+ for _, f := range files {
+ if strings.HasSuffix(f, ".meta_lic") {
+ lg.rootFiles = append(lg.rootFiles, f)
+ } else {
+ lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
+ }
+ }
+
+ recv := &receiver{
+ lg: lg,
+ rootFS: rootFS,
+ stderr: stderr,
+ task: make(chan bool, ConcurrentReaders),
+ results: make(chan *result, ConcurrentReaders),
+ wg: sync.WaitGroup{},
+ }
+ for i := 0; i < ConcurrentReaders; i++ {
+ recv.task <- true
+ }
+
+ readFiles := func() {
+ lg.mu.Lock()
+ // identify the metadata files to schedule reading tasks for
+ for _, f := range lg.rootFiles {
+ lg.targets[f] = nil
+ }
+ lg.mu.Unlock()
+
+ // schedule tasks to read the files
+ for _, f := range lg.rootFiles {
+ readFile(recv, f)
+ }
+
+ // schedule a task to wait until finished and close the channel.
+ go func() {
+ recv.wg.Wait()
+ close(recv.task)
+ close(recv.results)
+ }()
+ }
+ go readFiles()
+
+ // tasks to read license metadata files are scheduled; read and process results from channel
+ var err error
+ for recv.results != nil {
+ select {
+ case r, ok := <-recv.results:
+ if ok {
+ // handle errors by nil'ing ls, setting err, and clobbering results channel
+ if r.err != nil {
+ err = r.err
+ fmt.Fprintf(recv.stderr, "%s\n", err.Error())
+ lg = nil
+ recv.results = nil
+ continue
+ }
+
+ // record the parsed metadata (guarded by mutex)
+ recv.lg.mu.Lock()
+ recv.lg.targets[r.file] = r.target
+ if len(r.edges) > 0 {
+ recv.lg.edges = append(recv.lg.edges, r.edges...)
+ }
+ recv.lg.mu.Unlock()
+ } else {
+ // finished -- nil the results channel
+ recv.results = nil
+ }
+ }
+ }
+
+ return lg, err
+
+}
+
+// targetNode contains the license metadata for a node in the license graph.
+type targetNode struct {
+ proto license_metadata_proto.LicenseMetadata
+
+ // name is the path to the metadata file
+ name string
+}
+
+// dependencyEdge describes a single edge in the license graph.
+type dependencyEdge struct {
+ // target identifies the target node being built and/or installed.
+ target string
+
+ // dependency identifies the target node being depended on.
+ //
+ // i.e. `dependency` is necessary to build `target`.
+ dependency string
+
+ // annotations are a set of text attributes attached to the edge.
+ //
+ // Policy prescribes meaning to a limited set of annotations; others
+ // are preserved and ignored.
+ annotations TargetEdgeAnnotations
+}
+
+// addDependencies converts the proto AnnotatedDependencies into `edges`
+func addDependencies(edges *[]*dependencyEdge, target string, dependencies []*license_metadata_proto.AnnotatedDependency) error {
+ for _, ad := range dependencies {
+ dependency := ad.GetFile()
+ if len(dependency) == 0 {
+ return fmt.Errorf("missing dependency name")
+ }
+ annotations := newEdgeAnnotations()
+ for _, a := range ad.Annotations {
+ if len(a) == 0 {
+ continue
+ }
+ annotations.annotations[a] = true
+ }
+ *edges = append(*edges, &dependencyEdge{target, dependency, annotations})
+ }
+ return nil
+}
+
+// readFile is a task to read and parse a single license metadata file, and to schedule
+// additional tasks for reading and parsing dependencies as necessary.
+func readFile(recv *receiver, file string) {
+ recv.wg.Add(1)
+ <-recv.task
+ go func() {
+ f, err := recv.rootFS.Open(file)
+ if err != nil {
+ recv.results <- &result{file, nil, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
+ return
+ }
+
+ // read the file
+ data, err := io.ReadAll(f)
+ if err != nil {
+ recv.results <- &result{file, nil, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
+ return
+ }
+
+ tn := &TargetNode{name: file}
+
+ err = prototext.Unmarshal(data, &tn.proto)
+ if err != nil {
+ recv.results <- &result{file, nil, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
+ return
+ }
+
+ edges := []*dependencyEdge{}
+ err = addDependencies(&edges, file, tn.proto.Deps)
+ if err != nil {
+ recv.results <- &result{file, nil, nil, fmt.Errorf("error license metadata dependency %q: %w", file, err)}
+ return
+ }
+ tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
+
+ // send result for this file and release task before scheduling dependencies,
+ // but do not signal done to WaitGroup until dependencies are scheduled.
+ recv.results <- &result{file, tn, edges, nil}
+ recv.task <- true
+
+ // schedule tasks as necessary to read dependencies
+ for _, e := range edges {
+ // decide, signal and record whether to schedule task in critical section
+ recv.lg.mu.Lock()
+ _, alreadyScheduled := recv.lg.targets[e.dependency]
+ if !alreadyScheduled {
+ recv.lg.targets[e.dependency] = nil
+ }
+ recv.lg.mu.Unlock()
+ // schedule task to read dependency file outside critical section
+ if !alreadyScheduled {
+ readFile(recv, e.dependency)
+ }
+ }
+
+ // signal task done after scheduling dependencies
+ recv.wg.Done()
+ }()
+}