blob: a413ebef3126623de82a571b74a66ce6ff2d6b79 [file] [log] [blame]
Bob Badoura99ac622021-10-25 16:21:00 -07001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "fmt"
19 "io"
20 "io/fs"
Bob Badourc778e4c2022-03-22 13:05:19 -070021 "os"
Bob Badoura99ac622021-10-25 16:21:00 -070022 "strings"
23 "sync"
24
25 "android/soong/compliance/license_metadata_proto"
26
27 "google.golang.org/protobuf/encoding/prototext"
28)
29
30var (
31 // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
32 ConcurrentReaders = 5
33)
34
Bob Badourc778e4c2022-03-22 13:05:19 -070035type globalFS struct{}
36
Bob Badourdc62de42022-10-12 20:10:17 -070037var _ fs.FS = globalFS{}
38var _ fs.StatFS = globalFS{}
39
Bob Badourc778e4c2022-03-22 13:05:19 -070040func (s globalFS) Open(name string) (fs.File, error) {
41 return os.Open(name)
42}
43
Bob Badourdc62de42022-10-12 20:10:17 -070044func (s globalFS) Stat(name string) (fs.FileInfo, error) {
45 return os.Stat(name)
46}
47
Bob Badourc778e4c2022-03-22 13:05:19 -070048var FS globalFS
49
50// GetFS returns a filesystem for accessing files under the OUT_DIR environment variable.
51func GetFS(outDir string) fs.FS {
52 if len(outDir) > 0 {
53 return os.DirFS(outDir)
54 }
55 return os.DirFS(".")
56}
57
Bob Badoura99ac622021-10-25 16:21:00 -070058// result describes the outcome of reading and parsing a single license metadata file.
59type result struct {
60 // file identifies the path to the license metadata file
61 file string
62
63 // target contains the parsed metadata or nil if an error
64 target *TargetNode
65
Bob Badoura99ac622021-10-25 16:21:00 -070066 // err is nil unless an error occurs
67 err error
68}
69
70// receiver coordinates the tasks for reading and parsing license metadata files.
71type receiver struct {
Bob Badour103eb0f2022-01-10 13:50:57 -080072 // lg accumulates the read metadata and becomes the final resulting LicenseGraph.
Bob Badoura99ac622021-10-25 16:21:00 -070073 lg *LicenseGraph
74
75 // rootFS locates the root of the file system from which to read the files.
76 rootFS fs.FS
77
78 // stderr identifies the error output writer.
79 stderr io.Writer
80
81 // task provides a fixed-size task pool to limit concurrent open files etc.
82 task chan bool
83
84 // results returns one license metadata file result at a time.
85 results chan *result
86
87 // wg detects when done
88 wg sync.WaitGroup
89}
90
91// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
92//
93// `files` become the root files of the graph for top-down walks of the graph.
94func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
95 if len(files) == 0 {
96 return nil, fmt.Errorf("no license metadata to analyze")
97 }
98 if ConcurrentReaders < 1 {
99 return nil, fmt.Errorf("need at least one task in pool")
100 }
101
102 lg := newLicenseGraph()
103 for _, f := range files {
Bob Badour63a281c2022-01-10 17:59:14 -0800104 if strings.HasSuffix(f, "meta_lic") {
Bob Badoura99ac622021-10-25 16:21:00 -0700105 lg.rootFiles = append(lg.rootFiles, f)
106 } else {
107 lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
108 }
109 }
110
111 recv := &receiver{
112 lg: lg,
113 rootFS: rootFS,
114 stderr: stderr,
115 task: make(chan bool, ConcurrentReaders),
116 results: make(chan *result, ConcurrentReaders),
117 wg: sync.WaitGroup{},
118 }
119 for i := 0; i < ConcurrentReaders; i++ {
120 recv.task <- true
121 }
122
123 readFiles := func() {
124 lg.mu.Lock()
125 // identify the metadata files to schedule reading tasks for
126 for _, f := range lg.rootFiles {
127 lg.targets[f] = nil
128 }
129 lg.mu.Unlock()
130
131 // schedule tasks to read the files
132 for _, f := range lg.rootFiles {
133 readFile(recv, f)
134 }
135
136 // schedule a task to wait until finished and close the channel.
137 go func() {
138 recv.wg.Wait()
139 close(recv.task)
140 close(recv.results)
141 }()
142 }
143 go readFiles()
144
145 // tasks to read license metadata files are scheduled; read and process results from channel
146 var err error
147 for recv.results != nil {
148 select {
149 case r, ok := <-recv.results:
150 if ok {
151 // handle errors by nil'ing ls, setting err, and clobbering results channel
152 if r.err != nil {
153 err = r.err
154 fmt.Fprintf(recv.stderr, "%s\n", err.Error())
155 lg = nil
156 recv.results = nil
157 continue
158 }
159
160 // record the parsed metadata (guarded by mutex)
161 recv.lg.mu.Lock()
Bob Badour103eb0f2022-01-10 13:50:57 -0800162 lg.targets[r.target.name] = r.target
Bob Badoura99ac622021-10-25 16:21:00 -0700163 recv.lg.mu.Unlock()
164 } else {
165 // finished -- nil the results channel
166 recv.results = nil
167 }
168 }
169 }
170
Bob Badour103eb0f2022-01-10 13:50:57 -0800171 if lg != nil {
172 esize := 0
173 for _, tn := range lg.targets {
174 esize += len(tn.proto.Deps)
175 }
176 lg.edges = make(TargetEdgeList, 0, esize)
177 for _, tn := range lg.targets {
Bob Badoura6ee6d52022-12-16 13:50:41 -0800178 tn.licenseConditions = LicenseConditionSetFromNames(tn.proto.LicenseConditions...)
Bob Badour103eb0f2022-01-10 13:50:57 -0800179 err = addDependencies(lg, tn)
180 if err != nil {
181 return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err)
182 }
183 tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
184 }
185 }
Bob Badoura99ac622021-10-25 16:21:00 -0700186 return lg, err
187
188}
189
190// targetNode contains the license metadata for a node in the license graph.
191type targetNode struct {
192 proto license_metadata_proto.LicenseMetadata
193
Bob Badour103eb0f2022-01-10 13:50:57 -0800194 // name is the path to the metadata file.
Bob Badoura99ac622021-10-25 16:21:00 -0700195 name string
Bob Badoura99ac622021-10-25 16:21:00 -0700196
Bob Badour103eb0f2022-01-10 13:50:57 -0800197 // lg is the license graph the node belongs to.
198 lg *LicenseGraph
Bob Badoura99ac622021-10-25 16:21:00 -0700199
Bob Badour103eb0f2022-01-10 13:50:57 -0800200 // edges identifies the dependencies of the target.
201 edges TargetEdgeList
Bob Badoura99ac622021-10-25 16:21:00 -0700202
Bob Badour103eb0f2022-01-10 13:50:57 -0800203 // licenseConditions identifies the set of license conditions originating at the target node.
204 licenseConditions LicenseConditionSet
205
206 // resolution identifies the set of conditions resolved by acting on the target node.
207 resolution LicenseConditionSet
Bob Badour085a2c22022-09-21 19:36:59 -0700208
209 // pure indicates whether to treat the node as a pure aggregate (no internal linkage)
210 pure bool
Bob Badoura99ac622021-10-25 16:21:00 -0700211}
212
213// addDependencies converts the proto AnnotatedDependencies into `edges`
Bob Badour103eb0f2022-01-10 13:50:57 -0800214func addDependencies(lg *LicenseGraph, tn *TargetNode) error {
Colin Cross35f79c32022-01-27 15:18:52 -0800215 tn.edges = make(TargetEdgeList, 0, len(tn.proto.Deps))
Bob Badour103eb0f2022-01-10 13:50:57 -0800216 for _, ad := range tn.proto.Deps {
Bob Badoura99ac622021-10-25 16:21:00 -0700217 dependency := ad.GetFile()
218 if len(dependency) == 0 {
219 return fmt.Errorf("missing dependency name")
220 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800221 dtn, ok := lg.targets[dependency]
222 if !ok {
223 return fmt.Errorf("unknown dependency name %q", dependency)
224 }
225 if dtn == nil {
226 return fmt.Errorf("nil dependency for name %q", dependency)
227 }
Bob Badoura99ac622021-10-25 16:21:00 -0700228 annotations := newEdgeAnnotations()
229 for _, a := range ad.Annotations {
Bob Badour67d8ae32022-01-10 18:32:54 -0800230 // look up a common constant annotation string from a small map
231 // instead of creating 1000's of copies of the same 3 strings.
232 if ann, ok := RecognizedAnnotations[a]; ok {
Bob Badour5446a6f2022-01-10 18:44:59 -0800233 annotations.annotations[ann] = struct{}{}
Bob Badoura99ac622021-10-25 16:21:00 -0700234 }
Bob Badoura99ac622021-10-25 16:21:00 -0700235 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800236 edge := &TargetEdge{tn, dtn, annotations}
237 lg.edges = append(lg.edges, edge)
238 tn.edges = append(tn.edges, edge)
Bob Badoura99ac622021-10-25 16:21:00 -0700239 }
240 return nil
241}
242
243// readFile is a task to read and parse a single license metadata file, and to schedule
244// additional tasks for reading and parsing dependencies as necessary.
245func readFile(recv *receiver, file string) {
246 recv.wg.Add(1)
247 <-recv.task
248 go func() {
249 f, err := recv.rootFS.Open(file)
250 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800251 recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700252 return
253 }
254
255 // read the file
256 data, err := io.ReadAll(f)
257 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800258 recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700259 return
260 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800261 f.Close()
Bob Badoura99ac622021-10-25 16:21:00 -0700262
Bob Badour103eb0f2022-01-10 13:50:57 -0800263 tn := &TargetNode{lg: recv.lg, name: file}
Bob Badoura99ac622021-10-25 16:21:00 -0700264
265 err = prototext.Unmarshal(data, &tn.proto)
266 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800267 recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700268 return
269 }
270
Bob Badoura99ac622021-10-25 16:21:00 -0700271 // send result for this file and release task before scheduling dependencies,
272 // but do not signal done to WaitGroup until dependencies are scheduled.
Bob Badour103eb0f2022-01-10 13:50:57 -0800273 recv.results <- &result{file, tn, nil}
Bob Badoura99ac622021-10-25 16:21:00 -0700274 recv.task <- true
275
276 // schedule tasks as necessary to read dependencies
Bob Badour103eb0f2022-01-10 13:50:57 -0800277 for _, ad := range tn.proto.Deps {
278 dependency := ad.GetFile()
Bob Badoura99ac622021-10-25 16:21:00 -0700279 // decide, signal and record whether to schedule task in critical section
280 recv.lg.mu.Lock()
Bob Badour103eb0f2022-01-10 13:50:57 -0800281 _, alreadyScheduled := recv.lg.targets[dependency]
Bob Badoura99ac622021-10-25 16:21:00 -0700282 if !alreadyScheduled {
Bob Badour103eb0f2022-01-10 13:50:57 -0800283 recv.lg.targets[dependency] = nil
Bob Badoura99ac622021-10-25 16:21:00 -0700284 }
285 recv.lg.mu.Unlock()
286 // schedule task to read dependency file outside critical section
287 if !alreadyScheduled {
Bob Badour103eb0f2022-01-10 13:50:57 -0800288 readFile(recv, dependency)
Bob Badoura99ac622021-10-25 16:21:00 -0700289 }
290 }
291
292 // signal task done after scheduling dependencies
293 recv.wg.Done()
294 }()
295}