blob: 56509c9a11e587177d6d985ced294dcb425c48fd [file] [log] [blame]
Wei Lidec97b12023-04-07 16:45:17 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Generate the SBOM of the current target product in SPDX format.
19Usage example:
20 generate-sbom.py --output_file out/target/product/vsoc_x86_64/sbom.spdx \
21 --metadata out/target/product/vsoc_x86_64/sbom-metadata.csv \
22 --product_out_dir=out/target/product/vsoc_x86_64 \
23 --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \
24 --product_mfr=Google
25"""
26
27import argparse
28import csv
29import datetime
30import google.protobuf.text_format as text_format
31import hashlib
32import os
33import metadata_file_pb2
34import sbom_data
35import sbom_writers
36
37
38# Package type
39PKG_SOURCE = 'SOURCE'
40PKG_UPSTREAM = 'UPSTREAM'
41PKG_PREBUILT = 'PREBUILT'
42
43# Security tag
44NVD_CPE23 = 'NVD-CPE2.3:'
45
46# Report
47ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:'
48ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:'
49ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:'
50ISSUE_UNKNOWN_SECURITY_TAG_TYPE = 'Unknown security tag type:'
51ISSUE_INSTALLED_FILE_NOT_EXIST = 'Non-exist installed files:'
52INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:'
53
Wei Li6f407ba2023-04-19 12:39:07 -070054SOONG_PREBUILT_MODULE_TYPES = [
55 'android_app_import',
56 'android_library_import',
57 'cc_prebuilt_binary',
58 'cc_prebuilt_library',
59 'cc_prebuilt_library_headers',
60 'cc_prebuilt_library_shared',
61 'cc_prebuilt_library_static',
62 'cc_prebuilt_object',
63 'dex_import',
64 'java_import',
65 'java_sdk_library_import',
66 'java_system_modules_import',
67 'libclang_rt_prebuilt_library_static',
68 'libclang_rt_prebuilt_library_shared',
69 'llvm_prebuilt_library_static',
70 'ndk_prebuilt_object',
71 'ndk_prebuilt_shared_stl',
72 'nkd_prebuilt_static_stl',
73 'prebuilt_apex',
74 'prebuilt_bootclasspath_fragment',
75 'prebuilt_dsp',
76 'prebuilt_firmware',
77 'prebuilt_kernel_modules',
78 'prebuilt_rfsa',
79 'prebuilt_root',
80 'rust_prebuilt_dylib',
81 'rust_prebuilt_library',
82 'rust_prebuilt_rlib',
83 'vndk_prebuilt_shared',
84]
85
Wei Lidec97b12023-04-07 16:45:17 -070086
87def get_args():
88 parser = argparse.ArgumentParser()
89 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.')
90 parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.')
91 parser.add_argument('--metadata', required=True, help='The SBOM metadata file path.')
92 parser.add_argument('--product_out_dir', required=True, help='The parent directory of all the installed files.')
93 parser.add_argument('--build_version', required=True, help='The build version.')
94 parser.add_argument('--product_mfr', required=True, help='The product manufacturer.')
95 parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format')
96 parser.add_argument('--unbundled', action='store_true', default=False, help='Generate SBOM file for unbundled module')
97
98 return parser.parse_args()
99
100
101def log(*info):
102 if args.verbose:
103 for i in info:
104 print(i)
105
106
107def encode_for_spdxid(s):
108 """Simple encode for string values used in SPDXID which uses the charset of A-Za-Z0-9.-"""
109 result = ''
110 for c in s:
111 if c.isalnum() or c in '.-':
112 result += c
113 elif c in '_@/':
114 result += '-'
115 else:
116 result += '0x' + c.encode('utf-8').hex()
117
118 return result.lstrip('-')
119
120
121def new_package_id(package_name, type):
122 return f'SPDXRef-{type}-{encode_for_spdxid(package_name)}'
123
124
125def new_file_id(file_path):
126 return f'SPDXRef-{encode_for_spdxid(file_path)}'
127
128
129def checksum(file_path):
130 file_path = args.product_out_dir + '/' + file_path
131 h = hashlib.sha1()
132 if os.path.islink(file_path):
133 h.update(os.readlink(file_path).encode('utf-8'))
134 else:
135 with open(file_path, 'rb') as f:
136 h.update(f.read())
137 return f'SHA1: {h.hexdigest()}'
138
139
140def is_soong_prebuilt_module(file_metadata):
Wei Li6f407ba2023-04-19 12:39:07 -0700141 return (file_metadata['soong_module_type'] and
142 file_metadata['soong_module_type'] in SOONG_PREBUILT_MODULE_TYPES)
Wei Lidec97b12023-04-07 16:45:17 -0700143
144
145def is_source_package(file_metadata):
146 module_path = file_metadata['module_path']
147 return module_path.startswith('external/') and not is_prebuilt_package(file_metadata)
148
149
150def is_prebuilt_package(file_metadata):
151 module_path = file_metadata['module_path']
152 if module_path:
153 return (module_path.startswith('prebuilts/') or
154 is_soong_prebuilt_module(file_metadata) or
155 file_metadata['is_prebuilt_make_module'])
156
157 kernel_module_copy_files = file_metadata['kernel_module_copy_files']
158 if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'):
159 return True
160
161 return False
162
163
164def get_source_package_info(file_metadata, metadata_file_path):
165 """Return source package info exists in its METADATA file, currently including name, security tag
166 and external SBOM reference.
167
168 See go/android-spdx and go/android-sbom-gen for more details.
169 """
170 if not metadata_file_path:
171 return file_metadata['module_path'], []
172
173 metadata_proto = metadata_file_protos[metadata_file_path]
174 external_refs = []
175 for tag in metadata_proto.third_party.security.tag:
176 if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()):
177 external_refs.append(
178 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
179 type=sbom_data.PackageExternalRefType.cpe23Type,
180 locator=tag.removeprefix(NVD_CPE23)))
181 elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()):
182 external_refs.append(
183 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
184 type=sbom_data.PackageExternalRefType.cpe22Type,
185 locator=tag.removeprefix(NVD_CPE23)))
186
187 if metadata_proto.name:
188 return metadata_proto.name, external_refs
189 else:
190 return os.path.basename(metadata_file_path), external_refs # return the directory name only as package name
191
192
193def get_prebuilt_package_name(file_metadata, metadata_file_path):
194 """Return name of a prebuilt package, which can be from the METADATA file, metadata file path,
195 module path or kernel module's source path if the installed file is a kernel module.
196
197 See go/android-spdx and go/android-sbom-gen for more details.
198 """
199 name = None
200 if metadata_file_path:
201 metadata_proto = metadata_file_protos[metadata_file_path]
202 if metadata_proto.name:
203 name = metadata_proto.name
204 else:
205 name = metadata_file_path
206 elif file_metadata['module_path']:
207 name = file_metadata['module_path']
208 elif file_metadata['kernel_module_copy_files']:
209 src_path = file_metadata['kernel_module_copy_files'].split(':')[0]
210 name = os.path.dirname(src_path)
211
212 return name.removeprefix('prebuilts/').replace('/', '-')
213
214
215def get_metadata_file_path(file_metadata):
216 """Search for METADATA file of a package and return its path."""
217 metadata_path = ''
218 if file_metadata['module_path']:
219 metadata_path = file_metadata['module_path']
220 elif file_metadata['kernel_module_copy_files']:
221 metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
222
223 while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
224 metadata_path = os.path.dirname(metadata_path)
225
226 return metadata_path
227
228
229def get_package_version(metadata_file_path):
230 """Return a package's version in its METADATA file."""
231 if not metadata_file_path:
232 return None
233 metadata_proto = metadata_file_protos[metadata_file_path]
234 return metadata_proto.third_party.version
235
236
237def get_package_homepage(metadata_file_path):
238 """Return a package's homepage URL in its METADATA file."""
239 if not metadata_file_path:
240 return None
241 metadata_proto = metadata_file_protos[metadata_file_path]
242 if metadata_proto.third_party.homepage:
243 return metadata_proto.third_party.homepage
244 for url in metadata_proto.third_party.url:
245 if url.type == metadata_file_pb2.URL.Type.HOMEPAGE:
246 return url.value
247
248 return None
249
250
251def get_package_download_location(metadata_file_path):
252 """Return a package's code repository URL in its METADATA file."""
253 if not metadata_file_path:
254 return None
255 metadata_proto = metadata_file_protos[metadata_file_path]
256 if metadata_proto.third_party.url:
257 urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type)
258 if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE:
259 return urls[0].value
260 elif len(urls) > 1:
261 return urls[1].value
262
263 return None
264
265
266def get_sbom_fragments(installed_file_metadata, metadata_file_path):
267 """Return SPDX fragment of source/prebuilt packages, which usually contains a SOURCE/PREBUILT
268 package, a UPSTREAM package if it's a source package and a external SBOM document reference if
269 it's a prebuilt package with sbom_ref defined in its METADATA file.
270
271 See go/android-spdx and go/android-sbom-gen for more details.
272 """
273 external_doc_ref = None
274 packages = []
275 relationships = []
276
277 # Info from METADATA file
278 homepage = get_package_homepage(metadata_file_path)
279 version = get_package_version(metadata_file_path)
280 download_location = get_package_download_location(metadata_file_path)
281
282 if is_source_package(installed_file_metadata):
283 # Source fork packages
284 name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path)
285 source_package_id = new_package_id(name, PKG_SOURCE)
286 source_package = sbom_data.Package(id=source_package_id, name=name, version=args.build_version,
Wei Li52908252023-04-14 18:49:42 -0700287 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700288 supplier='Organization: ' + args.product_mfr,
289 external_refs=external_refs)
290
291 upstream_package_id = new_package_id(name, PKG_UPSTREAM)
292 upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version,
Wei Li52908252023-04-14 18:49:42 -0700293 supplier=('Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION,
Wei Lidec97b12023-04-07 16:45:17 -0700294 download_location=download_location)
295 packages += [source_package, upstream_package]
296 relationships.append(sbom_data.Relationship(id1=source_package_id,
297 relationship=sbom_data.RelationshipType.VARIANT_OF,
298 id2=upstream_package_id))
299 elif is_prebuilt_package(installed_file_metadata):
300 # Prebuilt fork packages
301 name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path)
302 prebuilt_package_id = new_package_id(name, PKG_PREBUILT)
303 prebuilt_package = sbom_data.Package(id=prebuilt_package_id,
304 name=name,
Wei Li52908252023-04-14 18:49:42 -0700305 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700306 version=args.build_version,
307 supplier='Organization: ' + args.product_mfr)
308 packages.append(prebuilt_package)
309
310 if metadata_file_path:
311 metadata_proto = metadata_file_protos[metadata_file_path]
312 if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref':
313 sbom_url = metadata_proto.third_party.sbom_ref.url
314 sbom_checksum = metadata_proto.third_party.sbom_ref.checksum
315 upstream_element_id = metadata_proto.third_party.sbom_ref.element_id
316 if sbom_url and sbom_checksum and upstream_element_id:
317 doc_ref_id = f'DocumentRef-{PKG_UPSTREAM}-{encode_for_spdxid(name)}'
318 external_doc_ref = sbom_data.DocumentExternalReference(id=doc_ref_id,
319 uri=sbom_url,
320 checksum=sbom_checksum)
321 relationships.append(
322 sbom_data.Relationship(id1=prebuilt_package_id,
323 relationship=sbom_data.RelationshipType.VARIANT_OF,
324 id2=doc_ref_id + ':' + upstream_element_id))
325
326 return external_doc_ref, packages, relationships
327
328
329def generate_package_verification_code(files):
330 checksums = [file.checksum for file in files]
331 checksums.sort()
332 h = hashlib.sha1()
333 h.update(''.join(checksums).encode(encoding='utf-8'))
334 return h.hexdigest()
335
336
337def save_report(report):
338 prefix, _ = os.path.splitext(args.output_file)
339 with open(prefix + '-gen-report.txt', 'w', encoding='utf-8') as report_file:
340 for type, issues in report.items():
341 report_file.write(type + '\n')
342 for issue in issues:
343 report_file.write('\t' + issue + '\n')
344 report_file.write('\n')
345
346
347# Validate the metadata generated by Make for installed files and report if there is no metadata.
348def installed_file_has_metadata(installed_file_metadata, report):
349 installed_file = installed_file_metadata['installed_file']
350 module_path = installed_file_metadata['module_path']
351 product_copy_files = installed_file_metadata['product_copy_files']
352 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
353 is_platform_generated = installed_file_metadata['is_platform_generated']
354
355 if (not module_path and
356 not product_copy_files and
357 not kernel_module_copy_files and
358 not is_platform_generated and
359 not installed_file.endswith('.fsv_meta')):
360 report[ISSUE_NO_METADATA].append(installed_file)
361 return False
362
363 return True
364
365
366def report_metadata_file(metadata_file_path, installed_file_metadata, report):
367 if metadata_file_path:
368 report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
369 'installed_file: {}, module_path: {}, METADATA file: {}'.format(
370 installed_file_metadata['installed_file'],
371 installed_file_metadata['module_path'],
372 metadata_file_path + '/METADATA'))
373
374 package_metadata = metadata_file_pb2.Metadata()
375 with open(metadata_file_path + '/METADATA', 'rt') as f:
376 text_format.Parse(f.read(), package_metadata)
377
378 if not metadata_file_path in metadata_file_protos:
379 metadata_file_protos[metadata_file_path] = package_metadata
380 if not package_metadata.name:
381 report[ISSUE_METADATA_FILE_INCOMPLETE].append(f'{metadata_file_path}/METADATA does not has "name"')
382
383 if not package_metadata.third_party.version:
384 report[ISSUE_METADATA_FILE_INCOMPLETE].append(
385 f'{metadata_file_path}/METADATA does not has "third_party.version"')
386
387 for tag in package_metadata.third_party.security.tag:
388 if not tag.startswith(NVD_CPE23):
389 report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append(
390 f'Unknown security tag type: {tag} in {metadata_file_path}/METADATA')
391 else:
392 report[ISSUE_NO_METADATA_FILE].append(
393 "installed_file: {}, module_path: {}".format(
394 installed_file_metadata['installed_file'], installed_file_metadata['module_path']))
395
396
397def generate_sbom_for_unbundled():
398 with open(args.metadata, newline='') as sbom_metadata_file:
399 reader = csv.DictReader(sbom_metadata_file)
400 doc = sbom_data.Document(name=args.build_version,
401 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
402 creators=['Organization: ' + args.product_mfr])
403 for installed_file_metadata in reader:
404 installed_file = installed_file_metadata['installed_file']
Wei Li49af9392023-04-12 17:35:26 -0700405 if args.output_file != args.product_out_dir + installed_file + '.spdx.json':
Wei Lidec97b12023-04-07 16:45:17 -0700406 continue
407
408 module_path = installed_file_metadata['module_path']
409 package_id = new_package_id(module_path, PKG_PREBUILT)
410 package = sbom_data.Package(id=package_id,
411 name=module_path,
412 version=args.build_version,
413 supplier='Organization: ' + args.product_mfr)
414 file_id = new_file_id(installed_file)
415 file = sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file))
416 relationship = sbom_data.Relationship(id1=file_id,
417 relationship=sbom_data.RelationshipType.GENERATED_FROM,
418 id2=package_id)
419 doc.add_package(package)
420 doc.files.append(file)
421 doc.describes = file_id
422 doc.add_relationship(relationship)
423 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
424 break
425
Wei Li49af9392023-04-12 17:35:26 -0700426 with open(args.output_file, 'w', encoding='utf-8') as file:
427 sbom_writers.JSONWriter.write(doc, file)
428 fragment_file = args.output_file.removesuffix('.spdx.json') + '-fragment.spdx'
429 with open(fragment_file, 'w', encoding='utf-8') as file:
Wei Lidec97b12023-04-07 16:45:17 -0700430 sbom_writers.TagValueWriter.write(doc, file, fragment=True)
431
432
433def main():
434 global args
435 args = get_args()
436 log('Args:', vars(args))
437
438 if args.unbundled:
439 generate_sbom_for_unbundled()
440 return
441
442 global metadata_file_protos
443 metadata_file_protos = {}
444
445 doc = sbom_data.Document(name=args.build_version,
446 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
447 creators=['Organization: ' + args.product_mfr])
448
449 product_package = sbom_data.Package(id=sbom_data.SPDXID_PRODUCT,
450 name=sbom_data.PACKAGE_NAME_PRODUCT,
Wei Li52908252023-04-14 18:49:42 -0700451 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700452 version=args.build_version,
453 supplier='Organization: ' + args.product_mfr,
454 files_analyzed=True)
455 doc.packages.append(product_package)
456
457 doc.packages.append(sbom_data.Package(id=sbom_data.SPDXID_PLATFORM,
458 name=sbom_data.PACKAGE_NAME_PLATFORM,
Wei Li52908252023-04-14 18:49:42 -0700459 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700460 version=args.build_version,
461 supplier='Organization: ' + args.product_mfr))
462
463 # Report on some issues and information
464 report = {
465 ISSUE_NO_METADATA: [],
466 ISSUE_NO_METADATA_FILE: [],
467 ISSUE_METADATA_FILE_INCOMPLETE: [],
468 ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [],
469 ISSUE_INSTALLED_FILE_NOT_EXIST: [],
470 INFO_METADATA_FOUND_FOR_PACKAGE: [],
471 }
472
473 # Scan the metadata in CSV file and create the corresponding package and file records in SPDX
474 with open(args.metadata, newline='') as sbom_metadata_file:
475 reader = csv.DictReader(sbom_metadata_file)
476 for installed_file_metadata in reader:
477 installed_file = installed_file_metadata['installed_file']
478 module_path = installed_file_metadata['module_path']
479 product_copy_files = installed_file_metadata['product_copy_files']
480 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
481
482 if not installed_file_has_metadata(installed_file_metadata, report):
483 continue
484 file_path = args.product_out_dir + '/' + installed_file
485 if not (os.path.islink(file_path) or os.path.isfile(file_path)):
486 report[ISSUE_INSTALLED_FILE_NOT_EXIST].append(installed_file)
487 continue
488
489 file_id = new_file_id(installed_file)
490 doc.files.append(
491 sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file)))
492 product_package.file_ids.append(file_id)
493
494 if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata):
495 metadata_file_path = get_metadata_file_path(installed_file_metadata)
496 report_metadata_file(metadata_file_path, installed_file_metadata, report)
497
498 # File from source fork packages or prebuilt fork packages
499 external_doc_ref, pkgs, rels = get_sbom_fragments(installed_file_metadata, metadata_file_path)
500 if len(pkgs) > 0:
501 if external_doc_ref:
502 doc.add_external_ref(external_doc_ref)
503 for p in pkgs:
504 doc.add_package(p)
505 for rel in rels:
506 doc.add_relationship(rel)
507 fork_package_id = pkgs[0].id # The first package should be the source/prebuilt fork package
508 doc.add_relationship(sbom_data.Relationship(id1=file_id,
509 relationship=sbom_data.RelationshipType.GENERATED_FROM,
510 id2=fork_package_id))
511 elif module_path or installed_file_metadata['is_platform_generated']:
512 # File from PLATFORM package
513 doc.add_relationship(sbom_data.Relationship(id1=file_id,
514 relationship=sbom_data.RelationshipType.GENERATED_FROM,
515 id2=sbom_data.SPDXID_PLATFORM))
516 elif product_copy_files:
517 # Format of product_copy_files: <source path>:<dest path>
518 src_path = product_copy_files.split(':')[0]
519 # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device,
520 # so process them as files from PLATFORM package
521 doc.add_relationship(sbom_data.Relationship(id1=file_id,
522 relationship=sbom_data.RelationshipType.GENERATED_FROM,
523 id2=sbom_data.SPDXID_PLATFORM))
524 elif installed_file.endswith('.fsv_meta'):
525 # See build/make/core/Makefile:2988
526 doc.add_relationship(sbom_data.Relationship(id1=file_id,
527 relationship=sbom_data.RelationshipType.GENERATED_FROM,
528 id2=sbom_data.SPDXID_PLATFORM))
529 elif kernel_module_copy_files.startswith('ANDROID-GEN'):
530 # For the four files generated for _dlkm, _ramdisk partitions
531 # See build/make/core/Makefile:323
532 doc.add_relationship(sbom_data.Relationship(id1=file_id,
533 relationship=sbom_data.RelationshipType.GENERATED_FROM,
534 id2=sbom_data.SPDXID_PLATFORM))
535
536 product_package.verification_code = generate_package_verification_code(doc.files)
537
538 # Save SBOM records to output file
539 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
540 with open(args.output_file, 'w', encoding="utf-8") as file:
541 sbom_writers.TagValueWriter.write(doc, file)
542 if args.json:
543 with open(args.output_file+'.json', 'w', encoding="utf-8") as file:
544 sbom_writers.JSONWriter.write(doc, file)
545
546
547if __name__ == '__main__':
548 main()