blob: a203258b966cef99d6364c6012d68b9f2f1bd0db [file] [log] [blame]
Wei Lia3265ef2024-02-05 14:49:50 -08001# !/usr/bin/env python3
2#
3# Copyright (C) 2024 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Generate the SBOM of the current target product in SPDX format.
19Usage example:
20 gen_sbom.py --output_file out/soong/sbom/aosp_cf_x86_64_phone/sbom.spdx \
21 --metadata out/soong/metadata/aosp_cf_x86_64_phone/metadata.db \
22 --product_out out/target/vsoc_x86_64
23 --soong_out out/soong
24 --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \
25 --product_mfr=Google
26"""
27
28import argparse
29import datetime
30import google.protobuf.text_format as text_format
31import hashlib
32import os
33import pathlib
34import queue
35import metadata_file_pb2
36import sbom_data
37import sbom_writers
38import sqlite3
39
40# Package type
41PKG_SOURCE = 'SOURCE'
42PKG_UPSTREAM = 'UPSTREAM'
43PKG_PREBUILT = 'PREBUILT'
44
45# Security tag
46NVD_CPE23 = 'NVD-CPE2.3:'
47
48# Report
49ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:'
50ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:'
51ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:'
52ISSUE_UNKNOWN_SECURITY_TAG_TYPE = 'Unknown security tag type:'
53ISSUE_INSTALLED_FILE_NOT_EXIST = 'Non-existent installed files:'
54ISSUE_NO_MODULE_FOUND_FOR_STATIC_DEP = 'No module found for static dependency files:'
55INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:'
56
57SOONG_PREBUILT_MODULE_TYPES = [
58 'android_app_import',
59 'android_library_import',
60 'cc_prebuilt_binary',
61 'cc_prebuilt_library',
62 'cc_prebuilt_library_headers',
63 'cc_prebuilt_library_shared',
64 'cc_prebuilt_library_static',
65 'cc_prebuilt_object',
66 'dex_import',
67 'java_import',
68 'java_sdk_library_import',
69 'java_system_modules_import',
70 'libclang_rt_prebuilt_library_static',
71 'libclang_rt_prebuilt_library_shared',
72 'llvm_prebuilt_library_static',
73 'ndk_prebuilt_object',
74 'ndk_prebuilt_shared_stl',
75 'nkd_prebuilt_static_stl',
76 'prebuilt_apex',
77 'prebuilt_bootclasspath_fragment',
78 'prebuilt_dsp',
79 'prebuilt_firmware',
80 'prebuilt_kernel_modules',
81 'prebuilt_rfsa',
82 'prebuilt_root',
83 'rust_prebuilt_dylib',
84 'rust_prebuilt_library',
85 'rust_prebuilt_rlib',
86 'vndk_prebuilt_shared',
87]
88
89THIRD_PARTY_IDENTIFIER_TYPES = [
90 # Types defined in metadata_file.proto
91 'Git',
92 'SVN',
93 'Hg',
94 'Darcs',
95 'VCS',
96 'Archive',
97 'PrebuiltByAlphabet',
98 'LocalSource',
99 'Other',
100 # OSV ecosystems defined at https://ossf.github.io/osv-schema/#affectedpackage-field.
101 'Go',
102 'npm',
103 'OSS-Fuzz',
104 'PyPI',
105 'RubyGems',
106 'crates.io',
107 'Hackage',
108 'GHC',
109 'Packagist',
110 'Maven',
111 'NuGet',
112 'Linux',
113 'Debian',
114 'Alpine',
115 'Hex',
116 'Android',
117 'GitHub Actions',
118 'Pub',
119 'ConanCenter',
120 'Rocky Linux',
121 'AlmaLinux',
122 'Bitnami',
123 'Photon OS',
124 'CRAN',
125 'Bioconductor',
126 'SwiftURL'
127]
128
129
130def get_args():
131 parser = argparse.ArgumentParser()
132 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.')
133 parser.add_argument('-d', '--debug', action='store_true', default=False, help='Debug mode')
134 parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.')
135 parser.add_argument('--metadata', required=True, help='The metadata DB file path.')
136 parser.add_argument('--product_out', required=True, help='The path of PRODUCT_OUT, e.g. out/target/product/vsoc_x86_64.')
137 parser.add_argument('--soong_out', required=True, help='The path of Soong output directory, e.g. out/soong')
138 parser.add_argument('--build_version', required=True, help='The build version.')
139 parser.add_argument('--product_mfr', required=True, help='The product manufacturer.')
140 parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format')
141
142 return parser.parse_args()
143
144
145def log(*info):
146 if args.verbose:
147 for i in info:
148 print(i)
149
150
151def new_package_id(package_name, type):
152 return f'SPDXRef-{type}-{sbom_data.encode_for_spdxid(package_name)}'
153
154
155def new_file_id(file_path):
156 return f'SPDXRef-{sbom_data.encode_for_spdxid(file_path)}'
157
158
159def new_license_id(license_name):
160 return f'LicenseRef-{sbom_data.encode_for_spdxid(license_name)}'
161
162
163def checksum(file_path):
164 h = hashlib.sha1()
165 if os.path.islink(file_path):
166 h.update(os.readlink(file_path).encode('utf-8'))
167 else:
168 with open(file_path, 'rb') as f:
169 h.update(f.read())
170 return f'SHA1: {h.hexdigest()}'
171
172
173def is_soong_prebuilt_module(file_metadata):
174 return (file_metadata['soong_module_type'] and
175 file_metadata['soong_module_type'] in SOONG_PREBUILT_MODULE_TYPES)
176
177
178def is_source_package(file_metadata):
179 module_path = file_metadata['module_path']
180 return module_path.startswith('external/') and not is_prebuilt_package(file_metadata)
181
182
183def is_prebuilt_package(file_metadata):
184 module_path = file_metadata['module_path']
185 if module_path:
186 return (module_path.startswith('prebuilts/') or
187 is_soong_prebuilt_module(file_metadata) or
188 file_metadata['is_prebuilt_make_module'])
189
190 kernel_module_copy_files = file_metadata['kernel_module_copy_files']
191 if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'):
192 return True
193
194 return False
195
196
197def get_source_package_info(file_metadata, metadata_file_path):
198 """Return source package info exists in its METADATA file, currently including name, security tag
199 and external SBOM reference.
200
201 See go/android-spdx and go/android-sbom-gen for more details.
202 """
203 if not metadata_file_path:
204 return file_metadata['module_path'], []
205
206 metadata_proto = metadata_file_protos[metadata_file_path]
207 external_refs = []
208 for tag in metadata_proto.third_party.security.tag:
209 if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()):
210 external_refs.append(
211 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
212 type=sbom_data.PackageExternalRefType.cpe23Type,
213 locator=tag.removeprefix(NVD_CPE23)))
214 elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()):
215 external_refs.append(
216 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
217 type=sbom_data.PackageExternalRefType.cpe22Type,
218 locator=tag.removeprefix(NVD_CPE23)))
219
220 if metadata_proto.name:
221 return metadata_proto.name, external_refs
222 else:
223 return os.path.basename(metadata_file_path), external_refs # return the directory name only as package name
224
225
226def get_prebuilt_package_name(file_metadata, metadata_file_path):
227 """Return name of a prebuilt package, which can be from the METADATA file, metadata file path,
228 module path or kernel module's source path if the installed file is a kernel module.
229
230 See go/android-spdx and go/android-sbom-gen for more details.
231 """
232 name = None
233 if metadata_file_path:
234 metadata_proto = metadata_file_protos[metadata_file_path]
235 if metadata_proto.name:
236 name = metadata_proto.name
237 else:
238 name = metadata_file_path
239 elif file_metadata['module_path']:
240 name = file_metadata['module_path']
241 elif file_metadata['kernel_module_copy_files']:
242 src_path = file_metadata['kernel_module_copy_files'].split(':')[0]
243 name = os.path.dirname(src_path)
244
245 return name.removeprefix('prebuilts/').replace('/', '-')
246
247
248def get_metadata_file_path(file_metadata):
249 """Search for METADATA file of a package and return its path."""
250 metadata_path = ''
251 if file_metadata['module_path']:
252 metadata_path = file_metadata['module_path']
253 elif file_metadata['kernel_module_copy_files']:
254 metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
255
256 while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
257 metadata_path = os.path.dirname(metadata_path)
258
259 return metadata_path
260
261
262def get_package_version(metadata_file_path):
263 """Return a package's version in its METADATA file."""
264 if not metadata_file_path:
265 return None
266 metadata_proto = metadata_file_protos[metadata_file_path]
267 return metadata_proto.third_party.version
268
269
270def get_package_homepage(metadata_file_path):
271 """Return a package's homepage URL in its METADATA file."""
272 if not metadata_file_path:
273 return None
274 metadata_proto = metadata_file_protos[metadata_file_path]
275 if metadata_proto.third_party.homepage:
276 return metadata_proto.third_party.homepage
277 for url in metadata_proto.third_party.url:
278 if url.type == metadata_file_pb2.URL.Type.HOMEPAGE:
279 return url.value
280
281 return None
282
283
284def get_package_download_location(metadata_file_path):
285 """Return a package's code repository URL in its METADATA file."""
286 if not metadata_file_path:
287 return None
288 metadata_proto = metadata_file_protos[metadata_file_path]
289 if metadata_proto.third_party.url:
290 urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type)
291 if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE:
292 return urls[0].value
293 elif len(urls) > 1:
294 return urls[1].value
295
296 return None
297
298
299def get_license_text(license_files):
300 license_text = ''
301 for license_file in license_files:
302 if args.debug:
303 license_text += '#### Content from ' + license_file + '\n'
304 else:
305 license_text += pathlib.Path(license_file).read_text(errors='replace') + '\n\n'
306 return license_text
307
308
309def get_sbom_fragments(installed_file_metadata, metadata_file_path):
310 """Return SPDX fragment of source/prebuilt packages, which usually contains a SOURCE/PREBUILT
311 package, a UPSTREAM package and an external SBOM document reference if sbom_ref defined in its
312 METADATA file.
313
314 See go/android-spdx and go/android-sbom-gen for more details.
315 """
316 external_doc_ref = None
317 packages = []
318 relationships = []
319 licenses = []
320
321 # Info from METADATA file
322 homepage = get_package_homepage(metadata_file_path)
323 version = get_package_version(metadata_file_path)
324 download_location = get_package_download_location(metadata_file_path)
325
326 lics = db.get_package_licenses(installed_file_metadata['module_path'])
327 if not lics:
328 lics = db.get_package_licenses(metadata_file_path)
329
330 if lics:
331 for license_name, license_files in lics.items():
332 if not license_files:
333 continue
334 license_id = new_license_id(license_name)
335 if license_name not in licenses_text:
336 licenses_text[license_name] = get_license_text(license_files.split(' '))
337 licenses.append(sbom_data.License(id=license_id, name=license_name, text=licenses_text[license_name]))
338
339 if is_source_package(installed_file_metadata):
340 # Source fork packages
341 name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path)
342 source_package_id = new_package_id(name, PKG_SOURCE)
343 source_package = sbom_data.Package(id=source_package_id, name=name, version=args.build_version,
344 download_location=sbom_data.VALUE_NONE,
345 supplier='Organization: ' + args.product_mfr,
346 external_refs=external_refs)
347
348 upstream_package_id = new_package_id(name, PKG_UPSTREAM)
349 upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version,
350 supplier=(
351 'Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION,
352 download_location=download_location)
353 packages += [source_package, upstream_package]
354 relationships.append(sbom_data.Relationship(id1=source_package_id,
355 relationship=sbom_data.RelationshipType.VARIANT_OF,
356 id2=upstream_package_id))
357
358 for license in licenses:
359 source_package.declared_license_ids.append(license.id)
360 upstream_package.declared_license_ids.append(license.id)
361
362 elif is_prebuilt_package(installed_file_metadata):
363 # Prebuilt fork packages
364 name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path)
365 prebuilt_package_id = new_package_id(name, PKG_PREBUILT)
366 prebuilt_package = sbom_data.Package(id=prebuilt_package_id,
367 name=name,
368 download_location=sbom_data.VALUE_NONE,
369 version=version if version else args.build_version,
370 supplier='Organization: ' + args.product_mfr)
371
372 upstream_package_id = new_package_id(name, PKG_UPSTREAM)
373 upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version,
374 supplier=(
375 'Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION,
376 download_location=download_location)
377 packages += [prebuilt_package, upstream_package]
378 relationships.append(sbom_data.Relationship(id1=prebuilt_package_id,
379 relationship=sbom_data.RelationshipType.VARIANT_OF,
380 id2=upstream_package_id))
381 for license in licenses:
382 prebuilt_package.declared_license_ids.append(license.id)
383 upstream_package.declared_license_ids.append(license.id)
384
385 if metadata_file_path:
386 metadata_proto = metadata_file_protos[metadata_file_path]
387 if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref':
388 sbom_url = metadata_proto.third_party.sbom_ref.url
389 sbom_checksum = metadata_proto.third_party.sbom_ref.checksum
390 upstream_element_id = metadata_proto.third_party.sbom_ref.element_id
391 if sbom_url and sbom_checksum and upstream_element_id:
392 doc_ref_id = f'DocumentRef-{PKG_UPSTREAM}-{sbom_data.encode_for_spdxid(name)}'
393 external_doc_ref = sbom_data.DocumentExternalReference(id=doc_ref_id,
394 uri=sbom_url,
395 checksum=sbom_checksum)
396 relationships.append(
397 sbom_data.Relationship(id1=upstream_package_id,
398 relationship=sbom_data.RelationshipType.VARIANT_OF,
399 id2=doc_ref_id + ':' + upstream_element_id))
400
401 return external_doc_ref, packages, relationships, licenses
402
403
404def save_report(report_file_path, report):
405 with open(report_file_path, 'w', encoding='utf-8') as report_file:
406 for type, issues in report.items():
407 report_file.write(type + '\n')
408 for issue in issues:
409 report_file.write('\t' + issue + '\n')
410 report_file.write('\n')
411
412
413# Validate the metadata generated by Make for installed files and report if there is no metadata.
414def installed_file_has_metadata(installed_file_metadata, report):
415 installed_file = installed_file_metadata['installed_file']
416 module_path = installed_file_metadata['module_path']
417 product_copy_files = installed_file_metadata['product_copy_files']
418 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
419 is_platform_generated = installed_file_metadata['is_platform_generated']
420
421 if (not module_path and
422 not product_copy_files and
423 not kernel_module_copy_files and
424 not is_platform_generated and
425 not installed_file.endswith('.fsv_meta')):
426 report[ISSUE_NO_METADATA].append(installed_file)
427 return False
428
429 return True
430
431
432# Validate identifiers in a package's METADATA.
433# 1) Only known identifier type is allowed
434# 2) Only one identifier's primary_source can be true
435def validate_package_metadata(metadata_file_path, package_metadata):
436 primary_source_found = False
437 for identifier in package_metadata.third_party.identifier:
438 if identifier.type not in THIRD_PARTY_IDENTIFIER_TYPES:
439 sys.exit(f'Unknown value of third_party.identifier.type in {metadata_file_path}/METADATA: {identifier.type}.')
440 if primary_source_found and identifier.primary_source:
441 sys.exit(
442 f'Field "primary_source" is set to true in multiple third_party.identifier in {metadata_file_path}/METADATA.')
443 primary_source_found = identifier.primary_source
444
445
446def report_metadata_file(metadata_file_path, installed_file_metadata, report):
447 if metadata_file_path:
448 report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
449 'installed_file: {}, module_path: {}, METADATA file: {}'.format(
450 installed_file_metadata['installed_file'],
451 installed_file_metadata['module_path'],
452 metadata_file_path + '/METADATA'))
453
454 package_metadata = metadata_file_pb2.Metadata()
455 with open(metadata_file_path + '/METADATA', 'rt') as f:
456 text_format.Parse(f.read(), package_metadata)
457
458 validate_package_metadata(metadata_file_path, package_metadata)
459
460 if not metadata_file_path in metadata_file_protos:
461 metadata_file_protos[metadata_file_path] = package_metadata
462 if not package_metadata.name:
463 report[ISSUE_METADATA_FILE_INCOMPLETE].append(f'{metadata_file_path}/METADATA does not has "name"')
464
465 if not package_metadata.third_party.version:
466 report[ISSUE_METADATA_FILE_INCOMPLETE].append(
467 f'{metadata_file_path}/METADATA does not has "third_party.version"')
468
469 for tag in package_metadata.third_party.security.tag:
470 if not tag.startswith(NVD_CPE23):
471 report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append(
472 f'Unknown security tag type: {tag} in {metadata_file_path}/METADATA')
473 else:
474 report[ISSUE_NO_METADATA_FILE].append(
475 "installed_file: {}, module_path: {}".format(
476 installed_file_metadata['installed_file'], installed_file_metadata['module_path']))
477
478
479# If a file is from a source fork or prebuilt fork package, add its package information to SBOM
480def add_package_of_file(file_id, file_metadata, doc, report):
481 metadata_file_path = get_metadata_file_path(file_metadata)
482 report_metadata_file(metadata_file_path, file_metadata, report)
483
484 external_doc_ref, pkgs, rels, licenses = get_sbom_fragments(file_metadata, metadata_file_path)
485 if len(pkgs) > 0:
486 if external_doc_ref:
487 doc.add_external_ref(external_doc_ref)
488 for p in pkgs:
489 doc.add_package(p)
490 for rel in rels:
491 doc.add_relationship(rel)
492 fork_package_id = pkgs[0].id # The first package should be the source/prebuilt fork package
493 doc.add_relationship(sbom_data.Relationship(id1=file_id,
494 relationship=sbom_data.RelationshipType.GENERATED_FROM,
495 id2=fork_package_id))
496 for license in licenses:
497 doc.add_license(license)
498
499
500# Add STATIC_LINK relationship for static dependencies of a file
501def add_static_deps_of_file(file_id, file_metadata, doc):
502 if not file_metadata['static_dep_files'] and not file_metadata['whole_static_dep_files']:
503 return
504 static_dep_files = []
505 if file_metadata['static_dep_files']:
506 static_dep_files += file_metadata['static_dep_files'].split(' ')
507 if file_metadata['whole_static_dep_files']:
508 static_dep_files += file_metadata['whole_static_dep_files'].split(' ')
509
510 for dep_file in static_dep_files:
511 # Static libs are not shipped on devices, so names are derived from .intermediates paths.
512 doc.add_relationship(sbom_data.Relationship(id1=file_id,
513 relationship=sbom_data.RelationshipType.STATIC_LINK,
514 id2=new_file_id(
515 dep_file.removeprefix(args.soong_out + '/.intermediates/'))))
516
517
518def add_licenses_of_file(file_id, file_metadata, doc):
519 lics = db.get_module_licenses(file_metadata.get('name', ''), file_metadata['module_path'])
520 if lics:
521 file = next(f for f in doc.files if file_id == f.id)
522 for license_name, license_files in lics.items():
523 if not license_files:
524 continue
525 license_id = new_license_id(license_name)
526 file.concluded_license_ids.append(license_id)
527 if license_name not in licenses_text:
528 license_text = get_license_text(license_files.split(' '))
529 licenses_text[license_name] = license_text
530
531 doc.add_license(sbom_data.License(id=license_id, name=license_name, text=licenses_text[license_name]))
532
533
534def get_all_transitive_static_dep_files_of_installed_files(installed_files_metadata, db, report):
535 # Find all transitive static dep files of all installed files
536 q = queue.Queue()
537 for installed_file_metadata in installed_files_metadata:
538 if installed_file_metadata['static_dep_files']:
539 for f in installed_file_metadata['static_dep_files'].split(' '):
540 q.put(f)
541 if installed_file_metadata['whole_static_dep_files']:
542 for f in installed_file_metadata['whole_static_dep_files'].split(' '):
543 q.put(f)
544
545 all_static_dep_files = {}
546 while not q.empty():
547 dep_file = q.get()
548 if dep_file in all_static_dep_files:
549 # It has been processed
550 continue
551
552 all_static_dep_files[dep_file] = True
553 soong_module = db.get_soong_module_of_built_file(dep_file)
554 if not soong_module:
555 # This should not happen, add to report[ISSUE_NO_MODULE_FOUND_FOR_STATIC_DEP]
556 report[ISSUE_NO_MODULE_FOUND_FOR_STATIC_DEP].append(f)
557 continue
558
559 if soong_module['static_dep_files']:
560 for f in soong_module['static_dep_files'].split(' '):
561 if f not in all_static_dep_files:
562 q.put(f)
563 if soong_module['whole_static_dep_files']:
564 for f in soong_module['whole_static_dep_files'].split(' '):
565 if f not in all_static_dep_files:
566 q.put(f)
567
568 return sorted(all_static_dep_files.keys())
569
570
571class MetadataDb:
572 def __init__(self, db):
573 self.conn = sqlite3.connect(':memory')
574 self.conn.row_factory = sqlite3.Row
575 with sqlite3.connect(db) as c:
576 c.backup(self.conn)
577 self.reorg()
578
579 def reorg(self):
580 # package_license table
581 self.conn.execute("create table package_license as "
582 "select name as package, pkg_default_applicable_licenses as license "
583 "from modules "
584 "where module_type = 'package' ")
585 cursor = self.conn.execute("select package,license from package_license where license like '% %'")
586 multi_licenses_packages = cursor.fetchall()
587 cursor.close()
588 rows = []
589 for p in multi_licenses_packages:
590 licenses = p['license'].strip().split(' ')
591 for lic in licenses:
592 rows.append((p['package'], lic))
593 self.conn.executemany('insert into package_license values (?, ?)', rows)
594 self.conn.commit()
595
596 self.conn.execute("delete from package_license where license like '% %'")
597 self.conn.commit()
598
599 # module_license table
600 self.conn.execute("create table module_license as "
601 "select distinct name as module, package, licenses as license "
602 "from modules "
603 "where licenses != '' ")
604 cursor = self.conn.execute("select module,package,license from module_license where license like '% %'")
605 multi_licenses_modules = cursor.fetchall()
606 cursor.close()
607 rows = []
608 for m in multi_licenses_modules:
609 licenses = m['license'].strip().split(' ')
610 for lic in licenses:
611 rows.append((m['module'], m['package'],lic))
612 self.conn.executemany('insert into module_license values (?, ?, ?)', rows)
613 self.conn.commit()
614
615 self.conn.execute("delete from module_license where license like '% %'")
616 self.conn.commit()
617
618 # module_installed_file table
619 self.conn.execute("create table module_installed_file as "
620 "select id as module_id, name as module_name, package, installed_files as installed_file "
621 "from modules "
622 "where installed_files != '' ")
623 cursor = self.conn.execute("select module_id, module_name, package, installed_file "
624 "from module_installed_file where installed_file like '% %'")
625 multi_installed_file_modules = cursor.fetchall()
626 cursor.close()
627 rows = []
628 for m in multi_installed_file_modules:
629 installed_files = m['installed_file'].strip().split(' ')
630 for f in installed_files:
631 rows.append((m['module_id'], m['module_name'], m['package'], f))
632 self.conn.executemany('insert into module_installed_file values (?, ?, ?, ?)', rows)
633 self.conn.commit()
634
635 self.conn.execute("delete from module_installed_file where installed_file like '% %'")
636 self.conn.commit()
637
638 # module_built_file table
639 self.conn.execute("create table module_built_file as "
640 "select id as module_id, name as module_name, package, built_files as built_file "
641 "from modules "
642 "where built_files != '' ")
643 cursor = self.conn.execute("select module_id, module_name, package, built_file "
644 "from module_built_file where built_file like '% %'")
645 multi_built_file_modules = cursor.fetchall()
646 cursor.close()
647 rows = []
648 for m in multi_built_file_modules:
649 built_files = m['installed_file'].strip().split(' ')
650 for f in built_files:
651 rows.append((m['module_id'], m['module_name'], m['package'], f))
652 self.conn.executemany('insert into module_built_file values (?, ?, ?, ?)', rows)
653 self.conn.commit()
654
655 self.conn.execute("delete from module_built_file where built_file like '% %'")
656 self.conn.commit()
657
658
659 # Indexes
660 self.conn.execute('create index idx_modules_id on modules (id)')
661 self.conn.execute('create index idx_modules_name on modules (name)')
662 self.conn.execute('create index idx_package_licnese_package on package_license (package)')
663 self.conn.execute('create index idx_package_licnese_license on package_license (license)')
664 self.conn.execute('create index idx_module_licnese_module on module_license (module)')
665 self.conn.execute('create index idx_module_licnese_license on module_license (license)')
666 self.conn.execute('create index idx_module_installed_file_module_id on module_installed_file (module_id)')
667 self.conn.execute('create index idx_module_installed_file_installed_file on module_installed_file (installed_file)')
668 self.conn.execute('create index idx_module_built_file_module_id on module_built_file (module_id)')
669 self.conn.execute('create index idx_module_built_file_built_file on module_built_file (built_file)')
670 self.conn.commit()
671
672 if args.debug:
673 with sqlite3.connect(os.path.dirname(args.metadata) + '/compliance-metadata-debug.db') as c:
674 self.conn.backup(c)
675
676
677 def get_installed_files(self):
678 # Get all records from table make_metadata, which contains all installed files and corresponding make modules' metadata
679 cursor = self.conn.execute('select installed_file, module_path, is_prebuilt_make_module, product_copy_files, kernel_module_copy_files, is_platform_generated, license_text from make_metadata')
680 rows = cursor.fetchall()
681 cursor.close()
682 installed_files_metadata = []
683 for row in rows:
684 metadata = dict(zip(row.keys(), row))
685 installed_files_metadata.append(metadata)
686 return installed_files_metadata
687
688 def get_soong_modules(self):
689 # Get all records from table modules, which contains metadata of all soong modules
690 cursor = self.conn.execute('select name, package, package as module_path, module_type as soong_module_type, built_files, installed_files, static_dep_files, whole_static_dep_files from modules')
691 rows = cursor.fetchall()
692 cursor.close()
693 soong_modules = []
694 for row in rows:
695 soong_module = dict(zip(row.keys(), row))
696 soong_modules.append(soong_module)
697 return soong_modules
698
699 def get_package_licenses(self, package):
700 cursor = self.conn.execute('select m.name, m.package, m.lic_license_text as license_text '
701 'from package_license pl join modules m on pl.license = m.name '
702 'where pl.package = ?',
703 ('//' + package,))
704 rows = cursor.fetchall()
705 licenses = {}
706 for r in rows:
707 licenses[r['name']] = r['license_text']
708 return licenses
709
710 def get_module_licenses(self, module_name, package):
711 licenses = {}
712 # If property "licenses" is defined on module
713 cursor = self.conn.execute('select m.name, m.package, m.lic_license_text as license_text '
714 'from module_license ml join modules m on ml.license = m.name '
715 'where ml.module = ? and ml.package = ?',
716 (module_name, package))
717 rows = cursor.fetchall()
718 for r in rows:
719 licenses[r['name']] = r['license_text']
720 if len(licenses) > 0:
721 return licenses
722
723 # Use default package license
724 cursor = self.conn.execute('select m.name, m.package, m.lic_license_text as license_text '
725 'from package_license pl join modules m on pl.license = m.name '
726 'where pl.package = ?',
727 ('//' + package,))
728 rows = cursor.fetchall()
729 for r in rows:
730 licenses[r['name']] = r['license_text']
731 return licenses
732
733 def get_soong_module_of_installed_file(self, installed_file):
734 cursor = self.conn.execute('select name, m.package, m.package as module_path, module_type as soong_module_type, built_files, installed_files, static_dep_files, whole_static_dep_files '
735 'from modules m join module_installed_file mif on m.id = mif.module_id '
736 'where mif.installed_file = ?',
737 (installed_file,))
738 rows = cursor.fetchall()
739 cursor.close()
740 if rows:
741 soong_module = dict(zip(rows[0].keys(), rows[0]))
742 return soong_module
743
744 return None
745
746 def get_soong_module_of_built_file(self, built_file):
747 cursor = self.conn.execute('select name, m.package, m.package as module_path, module_type as soong_module_type, built_files, installed_files, static_dep_files, whole_static_dep_files '
748 'from modules m join module_built_file mbf on m.id = mbf.module_id '
749 'where mbf.built_file = ?',
750 (built_file,))
751 rows = cursor.fetchall()
752 cursor.close()
753 if rows:
754 soong_module = dict(zip(rows[0].keys(), rows[0]))
755 return soong_module
756
757 return None
758
759
760def main():
761 global args
762 args = get_args()
763 log('Args:', vars(args))
764
765 global db
766 db = MetadataDb(args.metadata)
767 global metadata_file_protos
768 metadata_file_protos = {}
769 global licenses_text
770 licenses_text = {}
771
772 product_package_id = sbom_data.SPDXID_PRODUCT
773 product_package_name = sbom_data.PACKAGE_NAME_PRODUCT
774 product_package = sbom_data.Package(id=product_package_id,
775 name=product_package_name,
776 download_location=sbom_data.VALUE_NONE,
777 version=args.build_version,
778 supplier='Organization: ' + args.product_mfr,
779 files_analyzed=True)
780 doc_name = args.build_version
781 doc = sbom_data.Document(name=doc_name,
782 namespace=f'https://www.google.com/sbom/spdx/android/{doc_name}',
783 creators=['Organization: ' + args.product_mfr],
784 describes=product_package_id)
785
786 doc.packages.append(product_package)
787 doc.packages.append(sbom_data.Package(id=sbom_data.SPDXID_PLATFORM,
788 name=sbom_data.PACKAGE_NAME_PLATFORM,
789 download_location=sbom_data.VALUE_NONE,
790 version=args.build_version,
791 supplier='Organization: ' + args.product_mfr,
792 declared_license_ids=[sbom_data.SPDXID_LICENSE_APACHE]))
793
794 # Report on some issues and information
795 report = {
796 ISSUE_NO_METADATA: [],
797 ISSUE_NO_METADATA_FILE: [],
798 ISSUE_METADATA_FILE_INCOMPLETE: [],
799 ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [],
800 ISSUE_INSTALLED_FILE_NOT_EXIST: [],
801 ISSUE_NO_MODULE_FOUND_FOR_STATIC_DEP: [],
802 INFO_METADATA_FOUND_FOR_PACKAGE: [],
803 }
804
805 # Get installed files and corresponding make modules' metadata if an installed file is from a make module.
806 installed_files_metadata = db.get_installed_files()
807
808 # Find which Soong module an installed file is from and merge metadata from Make and Soong
809 for installed_file_metadata in installed_files_metadata:
810 soong_module = db.get_soong_module_of_installed_file(installed_file_metadata['installed_file'])
811 if soong_module:
812 # Merge soong metadata to make metadata
813 installed_file_metadata.update(soong_module)
814 else:
815 # For make modules soong_module_type should be empty
816 installed_file_metadata['soong_module_type'] = ''
817 installed_file_metadata['static_dep_files'] = ''
818 installed_file_metadata['whole_static_dep_files'] = ''
819
820 # Scan the metadata and create the corresponding package and file records in SPDX
821 for installed_file_metadata in installed_files_metadata:
822 installed_file = installed_file_metadata['installed_file']
823 module_path = installed_file_metadata['module_path']
824 product_copy_files = installed_file_metadata['product_copy_files']
825 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
826 build_output_path = installed_file
827 installed_file = installed_file.removeprefix(args.product_out)
828
829 if not installed_file_has_metadata(installed_file_metadata, report):
830 continue
831 if not (os.path.islink(build_output_path) or os.path.isfile(build_output_path)):
832 report[ISSUE_INSTALLED_FILE_NOT_EXIST].append(installed_file)
833 continue
834
835 file_id = new_file_id(installed_file)
836 sha1 = checksum(build_output_path)
837 f = sbom_data.File(id=file_id, name=installed_file, checksum=sha1)
838 doc.files.append(f)
839 product_package.file_ids.append(file_id)
840
841 if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata):
842 add_package_of_file(file_id, installed_file_metadata, doc, report)
843
844 elif module_path or installed_file_metadata['is_platform_generated']:
845 # File from PLATFORM package
846 doc.add_relationship(sbom_data.Relationship(id1=file_id,
847 relationship=sbom_data.RelationshipType.GENERATED_FROM,
848 id2=sbom_data.SPDXID_PLATFORM))
849 if installed_file_metadata['is_platform_generated']:
850 f.concluded_license_ids = [sbom_data.SPDXID_LICENSE_APACHE]
851
852 elif product_copy_files:
853 # Format of product_copy_files: <source path>:<dest path>
854 src_path = product_copy_files.split(':')[0]
855 # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device,
856 # so process them as files from PLATFORM package
857 doc.add_relationship(sbom_data.Relationship(id1=file_id,
858 relationship=sbom_data.RelationshipType.GENERATED_FROM,
859 id2=sbom_data.SPDXID_PLATFORM))
860 if installed_file_metadata['license_text']:
861 if installed_file_metadata['license_text'] == 'build/soong/licenses/LICENSE':
862 f.concluded_license_ids = [sbom_data.SPDXID_LICENSE_APACHE]
863
864 elif installed_file.endswith('.fsv_meta'):
865 doc.add_relationship(sbom_data.Relationship(id1=file_id,
866 relationship=sbom_data.RelationshipType.GENERATED_FROM,
867 id2=sbom_data.SPDXID_PLATFORM))
868 f.concluded_license_ids = [sbom_data.SPDXID_LICENSE_APACHE]
869
870 elif kernel_module_copy_files.startswith('ANDROID-GEN'):
871 # For the four files generated for _dlkm, _ramdisk partitions
872 doc.add_relationship(sbom_data.Relationship(id1=file_id,
873 relationship=sbom_data.RelationshipType.GENERATED_FROM,
874 id2=sbom_data.SPDXID_PLATFORM))
875
876 # Process static dependencies of the installed file
877 add_static_deps_of_file(file_id, installed_file_metadata, doc)
878
879 # Add licenses of the installed file
880 add_licenses_of_file(file_id, installed_file_metadata, doc)
881
882 # Add all static library files to SBOM
883 for dep_file in get_all_transitive_static_dep_files_of_installed_files(installed_files_metadata, db, report):
884 filepath = dep_file.removeprefix(args.soong_out + '/.intermediates/')
885 file_id = new_file_id(filepath)
886 # SHA1 of empty string. Sometimes .a files might not be built.
887 sha1 = 'SHA1: da39a3ee5e6b4b0d3255bfef95601890afd80709'
888 if os.path.islink(dep_file) or os.path.isfile(dep_file):
889 sha1 = checksum(dep_file)
890 doc.files.append(sbom_data.File(id=file_id,
891 name=filepath,
892 checksum=sha1))
893 file_metadata = {
894 'installed_file': dep_file,
895 'is_prebuilt_make_module': False
896 }
897 file_metadata.update(db.get_soong_module_of_built_file(dep_file))
898 add_package_of_file(file_id, file_metadata, doc, report)
899
900 # Add relationships for static deps of static libraries
901 add_static_deps_of_file(file_id, file_metadata, doc)
902
903 # Add licenses of the static lib
904 add_licenses_of_file(file_id, file_metadata, doc)
905
906 # Save SBOM records to output file
907 doc.generate_packages_verification_code()
908 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
909 prefix = args.output_file
910 if prefix.endswith('.spdx'):
911 prefix = prefix.removesuffix('.spdx')
912 elif prefix.endswith('.spdx.json'):
913 prefix = prefix.removesuffix('.spdx.json')
914
915 output_file = prefix + '.spdx'
916 with open(output_file, 'w', encoding="utf-8") as file:
917 sbom_writers.TagValueWriter.write(doc, file)
918 if args.json:
919 with open(prefix + '.spdx.json', 'w', encoding="utf-8") as file:
920 sbom_writers.JSONWriter.write(doc, file)
921
922 save_report(prefix + '-gen-report.txt', report)
923
924
925if __name__ == '__main__':
926 main()