blob: 192061ea4e3d538374e3fadb988d12982e40015e [file] [log] [blame]
Wei Lidec97b12023-04-07 16:45:17 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Generate the SBOM of the current target product in SPDX format.
19Usage example:
20 generate-sbom.py --output_file out/target/product/vsoc_x86_64/sbom.spdx \
21 --metadata out/target/product/vsoc_x86_64/sbom-metadata.csv \
22 --product_out_dir=out/target/product/vsoc_x86_64 \
23 --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \
24 --product_mfr=Google
25"""
26
27import argparse
28import csv
29import datetime
30import google.protobuf.text_format as text_format
31import hashlib
32import os
33import metadata_file_pb2
34import sbom_data
35import sbom_writers
36
37
38# Package type
39PKG_SOURCE = 'SOURCE'
40PKG_UPSTREAM = 'UPSTREAM'
41PKG_PREBUILT = 'PREBUILT'
42
43# Security tag
44NVD_CPE23 = 'NVD-CPE2.3:'
45
46# Report
47ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:'
48ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:'
49ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:'
50ISSUE_UNKNOWN_SECURITY_TAG_TYPE = 'Unknown security tag type:'
51ISSUE_INSTALLED_FILE_NOT_EXIST = 'Non-exist installed files:'
52INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:'
53
54
55def get_args():
56 parser = argparse.ArgumentParser()
57 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.')
58 parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.')
59 parser.add_argument('--metadata', required=True, help='The SBOM metadata file path.')
60 parser.add_argument('--product_out_dir', required=True, help='The parent directory of all the installed files.')
61 parser.add_argument('--build_version', required=True, help='The build version.')
62 parser.add_argument('--product_mfr', required=True, help='The product manufacturer.')
63 parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format')
64 parser.add_argument('--unbundled', action='store_true', default=False, help='Generate SBOM file for unbundled module')
65
66 return parser.parse_args()
67
68
69def log(*info):
70 if args.verbose:
71 for i in info:
72 print(i)
73
74
75def encode_for_spdxid(s):
76 """Simple encode for string values used in SPDXID which uses the charset of A-Za-Z0-9.-"""
77 result = ''
78 for c in s:
79 if c.isalnum() or c in '.-':
80 result += c
81 elif c in '_@/':
82 result += '-'
83 else:
84 result += '0x' + c.encode('utf-8').hex()
85
86 return result.lstrip('-')
87
88
89def new_package_id(package_name, type):
90 return f'SPDXRef-{type}-{encode_for_spdxid(package_name)}'
91
92
93def new_file_id(file_path):
94 return f'SPDXRef-{encode_for_spdxid(file_path)}'
95
96
97def checksum(file_path):
98 file_path = args.product_out_dir + '/' + file_path
99 h = hashlib.sha1()
100 if os.path.islink(file_path):
101 h.update(os.readlink(file_path).encode('utf-8'))
102 else:
103 with open(file_path, 'rb') as f:
104 h.update(f.read())
105 return f'SHA1: {h.hexdigest()}'
106
107
108def is_soong_prebuilt_module(file_metadata):
109 return file_metadata['soong_module_type'] and file_metadata['soong_module_type'] in [
110 'android_app_import', 'android_library_import', 'cc_prebuilt_binary', 'cc_prebuilt_library',
111 'cc_prebuilt_library_headers', 'cc_prebuilt_library_shared', 'cc_prebuilt_library_static', 'cc_prebuilt_object',
112 'dex_import', 'java_import', 'java_sdk_library_import', 'java_system_modules_import',
113 'libclang_rt_prebuilt_library_static', 'libclang_rt_prebuilt_library_shared', 'llvm_prebuilt_library_static',
114 'ndk_prebuilt_object', 'ndk_prebuilt_shared_stl', 'nkd_prebuilt_static_stl', 'prebuilt_apex',
115 'prebuilt_bootclasspath_fragment', 'prebuilt_dsp', 'prebuilt_firmware', 'prebuilt_kernel_modules',
116 'prebuilt_rfsa', 'prebuilt_root', 'rust_prebuilt_dylib', 'rust_prebuilt_library', 'rust_prebuilt_rlib',
117 'vndk_prebuilt_shared',
118
119 # 'android_test_import',
120 # 'cc_prebuilt_test_library_shared',
121 # 'java_import_host',
122 # 'java_test_import',
123 # 'llvm_host_prebuilt_library_shared',
124 # 'prebuilt_apis',
125 # 'prebuilt_build_tool',
126 # 'prebuilt_defaults',
127 # 'prebuilt_etc',
128 # 'prebuilt_etc_host',
129 # 'prebuilt_etc_xml',
130 # 'prebuilt_font',
131 # 'prebuilt_hidl_interfaces',
132 # 'prebuilt_platform_compat_config',
133 # 'prebuilt_stubs_sources',
134 # 'prebuilt_usr_share',
135 # 'prebuilt_usr_share_host',
136 # 'soong_config_module_type_import',
137 ]
138
139
140def is_source_package(file_metadata):
141 module_path = file_metadata['module_path']
142 return module_path.startswith('external/') and not is_prebuilt_package(file_metadata)
143
144
145def is_prebuilt_package(file_metadata):
146 module_path = file_metadata['module_path']
147 if module_path:
148 return (module_path.startswith('prebuilts/') or
149 is_soong_prebuilt_module(file_metadata) or
150 file_metadata['is_prebuilt_make_module'])
151
152 kernel_module_copy_files = file_metadata['kernel_module_copy_files']
153 if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'):
154 return True
155
156 return False
157
158
159def get_source_package_info(file_metadata, metadata_file_path):
160 """Return source package info exists in its METADATA file, currently including name, security tag
161 and external SBOM reference.
162
163 See go/android-spdx and go/android-sbom-gen for more details.
164 """
165 if not metadata_file_path:
166 return file_metadata['module_path'], []
167
168 metadata_proto = metadata_file_protos[metadata_file_path]
169 external_refs = []
170 for tag in metadata_proto.third_party.security.tag:
171 if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()):
172 external_refs.append(
173 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
174 type=sbom_data.PackageExternalRefType.cpe23Type,
175 locator=tag.removeprefix(NVD_CPE23)))
176 elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()):
177 external_refs.append(
178 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
179 type=sbom_data.PackageExternalRefType.cpe22Type,
180 locator=tag.removeprefix(NVD_CPE23)))
181
182 if metadata_proto.name:
183 return metadata_proto.name, external_refs
184 else:
185 return os.path.basename(metadata_file_path), external_refs # return the directory name only as package name
186
187
188def get_prebuilt_package_name(file_metadata, metadata_file_path):
189 """Return name of a prebuilt package, which can be from the METADATA file, metadata file path,
190 module path or kernel module's source path if the installed file is a kernel module.
191
192 See go/android-spdx and go/android-sbom-gen for more details.
193 """
194 name = None
195 if metadata_file_path:
196 metadata_proto = metadata_file_protos[metadata_file_path]
197 if metadata_proto.name:
198 name = metadata_proto.name
199 else:
200 name = metadata_file_path
201 elif file_metadata['module_path']:
202 name = file_metadata['module_path']
203 elif file_metadata['kernel_module_copy_files']:
204 src_path = file_metadata['kernel_module_copy_files'].split(':')[0]
205 name = os.path.dirname(src_path)
206
207 return name.removeprefix('prebuilts/').replace('/', '-')
208
209
210def get_metadata_file_path(file_metadata):
211 """Search for METADATA file of a package and return its path."""
212 metadata_path = ''
213 if file_metadata['module_path']:
214 metadata_path = file_metadata['module_path']
215 elif file_metadata['kernel_module_copy_files']:
216 metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
217
218 while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
219 metadata_path = os.path.dirname(metadata_path)
220
221 return metadata_path
222
223
224def get_package_version(metadata_file_path):
225 """Return a package's version in its METADATA file."""
226 if not metadata_file_path:
227 return None
228 metadata_proto = metadata_file_protos[metadata_file_path]
229 return metadata_proto.third_party.version
230
231
232def get_package_homepage(metadata_file_path):
233 """Return a package's homepage URL in its METADATA file."""
234 if not metadata_file_path:
235 return None
236 metadata_proto = metadata_file_protos[metadata_file_path]
237 if metadata_proto.third_party.homepage:
238 return metadata_proto.third_party.homepage
239 for url in metadata_proto.third_party.url:
240 if url.type == metadata_file_pb2.URL.Type.HOMEPAGE:
241 return url.value
242
243 return None
244
245
246def get_package_download_location(metadata_file_path):
247 """Return a package's code repository URL in its METADATA file."""
248 if not metadata_file_path:
249 return None
250 metadata_proto = metadata_file_protos[metadata_file_path]
251 if metadata_proto.third_party.url:
252 urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type)
253 if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE:
254 return urls[0].value
255 elif len(urls) > 1:
256 return urls[1].value
257
258 return None
259
260
261def get_sbom_fragments(installed_file_metadata, metadata_file_path):
262 """Return SPDX fragment of source/prebuilt packages, which usually contains a SOURCE/PREBUILT
263 package, a UPSTREAM package if it's a source package and a external SBOM document reference if
264 it's a prebuilt package with sbom_ref defined in its METADATA file.
265
266 See go/android-spdx and go/android-sbom-gen for more details.
267 """
268 external_doc_ref = None
269 packages = []
270 relationships = []
271
272 # Info from METADATA file
273 homepage = get_package_homepage(metadata_file_path)
274 version = get_package_version(metadata_file_path)
275 download_location = get_package_download_location(metadata_file_path)
276
277 if is_source_package(installed_file_metadata):
278 # Source fork packages
279 name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path)
280 source_package_id = new_package_id(name, PKG_SOURCE)
281 source_package = sbom_data.Package(id=source_package_id, name=name, version=args.build_version,
Wei Li52908252023-04-14 18:49:42 -0700282 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700283 supplier='Organization: ' + args.product_mfr,
284 external_refs=external_refs)
285
286 upstream_package_id = new_package_id(name, PKG_UPSTREAM)
287 upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version,
Wei Li52908252023-04-14 18:49:42 -0700288 supplier=('Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION,
Wei Lidec97b12023-04-07 16:45:17 -0700289 download_location=download_location)
290 packages += [source_package, upstream_package]
291 relationships.append(sbom_data.Relationship(id1=source_package_id,
292 relationship=sbom_data.RelationshipType.VARIANT_OF,
293 id2=upstream_package_id))
294 elif is_prebuilt_package(installed_file_metadata):
295 # Prebuilt fork packages
296 name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path)
297 prebuilt_package_id = new_package_id(name, PKG_PREBUILT)
298 prebuilt_package = sbom_data.Package(id=prebuilt_package_id,
299 name=name,
Wei Li52908252023-04-14 18:49:42 -0700300 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700301 version=args.build_version,
302 supplier='Organization: ' + args.product_mfr)
303 packages.append(prebuilt_package)
304
305 if metadata_file_path:
306 metadata_proto = metadata_file_protos[metadata_file_path]
307 if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref':
308 sbom_url = metadata_proto.third_party.sbom_ref.url
309 sbom_checksum = metadata_proto.third_party.sbom_ref.checksum
310 upstream_element_id = metadata_proto.third_party.sbom_ref.element_id
311 if sbom_url and sbom_checksum and upstream_element_id:
312 doc_ref_id = f'DocumentRef-{PKG_UPSTREAM}-{encode_for_spdxid(name)}'
313 external_doc_ref = sbom_data.DocumentExternalReference(id=doc_ref_id,
314 uri=sbom_url,
315 checksum=sbom_checksum)
316 relationships.append(
317 sbom_data.Relationship(id1=prebuilt_package_id,
318 relationship=sbom_data.RelationshipType.VARIANT_OF,
319 id2=doc_ref_id + ':' + upstream_element_id))
320
321 return external_doc_ref, packages, relationships
322
323
324def generate_package_verification_code(files):
325 checksums = [file.checksum for file in files]
326 checksums.sort()
327 h = hashlib.sha1()
328 h.update(''.join(checksums).encode(encoding='utf-8'))
329 return h.hexdigest()
330
331
332def save_report(report):
333 prefix, _ = os.path.splitext(args.output_file)
334 with open(prefix + '-gen-report.txt', 'w', encoding='utf-8') as report_file:
335 for type, issues in report.items():
336 report_file.write(type + '\n')
337 for issue in issues:
338 report_file.write('\t' + issue + '\n')
339 report_file.write('\n')
340
341
342# Validate the metadata generated by Make for installed files and report if there is no metadata.
343def installed_file_has_metadata(installed_file_metadata, report):
344 installed_file = installed_file_metadata['installed_file']
345 module_path = installed_file_metadata['module_path']
346 product_copy_files = installed_file_metadata['product_copy_files']
347 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
348 is_platform_generated = installed_file_metadata['is_platform_generated']
349
350 if (not module_path and
351 not product_copy_files and
352 not kernel_module_copy_files and
353 not is_platform_generated and
354 not installed_file.endswith('.fsv_meta')):
355 report[ISSUE_NO_METADATA].append(installed_file)
356 return False
357
358 return True
359
360
361def report_metadata_file(metadata_file_path, installed_file_metadata, report):
362 if metadata_file_path:
363 report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
364 'installed_file: {}, module_path: {}, METADATA file: {}'.format(
365 installed_file_metadata['installed_file'],
366 installed_file_metadata['module_path'],
367 metadata_file_path + '/METADATA'))
368
369 package_metadata = metadata_file_pb2.Metadata()
370 with open(metadata_file_path + '/METADATA', 'rt') as f:
371 text_format.Parse(f.read(), package_metadata)
372
373 if not metadata_file_path in metadata_file_protos:
374 metadata_file_protos[metadata_file_path] = package_metadata
375 if not package_metadata.name:
376 report[ISSUE_METADATA_FILE_INCOMPLETE].append(f'{metadata_file_path}/METADATA does not has "name"')
377
378 if not package_metadata.third_party.version:
379 report[ISSUE_METADATA_FILE_INCOMPLETE].append(
380 f'{metadata_file_path}/METADATA does not has "third_party.version"')
381
382 for tag in package_metadata.third_party.security.tag:
383 if not tag.startswith(NVD_CPE23):
384 report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append(
385 f'Unknown security tag type: {tag} in {metadata_file_path}/METADATA')
386 else:
387 report[ISSUE_NO_METADATA_FILE].append(
388 "installed_file: {}, module_path: {}".format(
389 installed_file_metadata['installed_file'], installed_file_metadata['module_path']))
390
391
392def generate_sbom_for_unbundled():
393 with open(args.metadata, newline='') as sbom_metadata_file:
394 reader = csv.DictReader(sbom_metadata_file)
395 doc = sbom_data.Document(name=args.build_version,
396 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
397 creators=['Organization: ' + args.product_mfr])
398 for installed_file_metadata in reader:
399 installed_file = installed_file_metadata['installed_file']
400 if args.output_file != args.product_out_dir + installed_file + ".spdx":
401 continue
402
403 module_path = installed_file_metadata['module_path']
404 package_id = new_package_id(module_path, PKG_PREBUILT)
405 package = sbom_data.Package(id=package_id,
406 name=module_path,
407 version=args.build_version,
408 supplier='Organization: ' + args.product_mfr)
409 file_id = new_file_id(installed_file)
410 file = sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file))
411 relationship = sbom_data.Relationship(id1=file_id,
412 relationship=sbom_data.RelationshipType.GENERATED_FROM,
413 id2=package_id)
414 doc.add_package(package)
415 doc.files.append(file)
416 doc.describes = file_id
417 doc.add_relationship(relationship)
418 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
419 break
420
421 with open(args.output_file, 'w', encoding="utf-8") as file:
422 sbom_writers.TagValueWriter.write(doc, file, fragment=True)
423
424
425def main():
426 global args
427 args = get_args()
428 log('Args:', vars(args))
429
430 if args.unbundled:
431 generate_sbom_for_unbundled()
432 return
433
434 global metadata_file_protos
435 metadata_file_protos = {}
436
437 doc = sbom_data.Document(name=args.build_version,
438 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
439 creators=['Organization: ' + args.product_mfr])
440
441 product_package = sbom_data.Package(id=sbom_data.SPDXID_PRODUCT,
442 name=sbom_data.PACKAGE_NAME_PRODUCT,
Wei Li52908252023-04-14 18:49:42 -0700443 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700444 version=args.build_version,
445 supplier='Organization: ' + args.product_mfr,
446 files_analyzed=True)
447 doc.packages.append(product_package)
448
449 doc.packages.append(sbom_data.Package(id=sbom_data.SPDXID_PLATFORM,
450 name=sbom_data.PACKAGE_NAME_PLATFORM,
Wei Li52908252023-04-14 18:49:42 -0700451 download_location=sbom_data.VALUE_NONE,
Wei Lidec97b12023-04-07 16:45:17 -0700452 version=args.build_version,
453 supplier='Organization: ' + args.product_mfr))
454
455 # Report on some issues and information
456 report = {
457 ISSUE_NO_METADATA: [],
458 ISSUE_NO_METADATA_FILE: [],
459 ISSUE_METADATA_FILE_INCOMPLETE: [],
460 ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [],
461 ISSUE_INSTALLED_FILE_NOT_EXIST: [],
462 INFO_METADATA_FOUND_FOR_PACKAGE: [],
463 }
464
465 # Scan the metadata in CSV file and create the corresponding package and file records in SPDX
466 with open(args.metadata, newline='') as sbom_metadata_file:
467 reader = csv.DictReader(sbom_metadata_file)
468 for installed_file_metadata in reader:
469 installed_file = installed_file_metadata['installed_file']
470 module_path = installed_file_metadata['module_path']
471 product_copy_files = installed_file_metadata['product_copy_files']
472 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
473
474 if not installed_file_has_metadata(installed_file_metadata, report):
475 continue
476 file_path = args.product_out_dir + '/' + installed_file
477 if not (os.path.islink(file_path) or os.path.isfile(file_path)):
478 report[ISSUE_INSTALLED_FILE_NOT_EXIST].append(installed_file)
479 continue
480
481 file_id = new_file_id(installed_file)
482 doc.files.append(
483 sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file)))
484 product_package.file_ids.append(file_id)
485
486 if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata):
487 metadata_file_path = get_metadata_file_path(installed_file_metadata)
488 report_metadata_file(metadata_file_path, installed_file_metadata, report)
489
490 # File from source fork packages or prebuilt fork packages
491 external_doc_ref, pkgs, rels = get_sbom_fragments(installed_file_metadata, metadata_file_path)
492 if len(pkgs) > 0:
493 if external_doc_ref:
494 doc.add_external_ref(external_doc_ref)
495 for p in pkgs:
496 doc.add_package(p)
497 for rel in rels:
498 doc.add_relationship(rel)
499 fork_package_id = pkgs[0].id # The first package should be the source/prebuilt fork package
500 doc.add_relationship(sbom_data.Relationship(id1=file_id,
501 relationship=sbom_data.RelationshipType.GENERATED_FROM,
502 id2=fork_package_id))
503 elif module_path or installed_file_metadata['is_platform_generated']:
504 # File from PLATFORM package
505 doc.add_relationship(sbom_data.Relationship(id1=file_id,
506 relationship=sbom_data.RelationshipType.GENERATED_FROM,
507 id2=sbom_data.SPDXID_PLATFORM))
508 elif product_copy_files:
509 # Format of product_copy_files: <source path>:<dest path>
510 src_path = product_copy_files.split(':')[0]
511 # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device,
512 # so process them as files from PLATFORM package
513 doc.add_relationship(sbom_data.Relationship(id1=file_id,
514 relationship=sbom_data.RelationshipType.GENERATED_FROM,
515 id2=sbom_data.SPDXID_PLATFORM))
516 elif installed_file.endswith('.fsv_meta'):
517 # See build/make/core/Makefile:2988
518 doc.add_relationship(sbom_data.Relationship(id1=file_id,
519 relationship=sbom_data.RelationshipType.GENERATED_FROM,
520 id2=sbom_data.SPDXID_PLATFORM))
521 elif kernel_module_copy_files.startswith('ANDROID-GEN'):
522 # For the four files generated for _dlkm, _ramdisk partitions
523 # See build/make/core/Makefile:323
524 doc.add_relationship(sbom_data.Relationship(id1=file_id,
525 relationship=sbom_data.RelationshipType.GENERATED_FROM,
526 id2=sbom_data.SPDXID_PLATFORM))
527
528 product_package.verification_code = generate_package_verification_code(doc.files)
529
530 # Save SBOM records to output file
531 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
532 with open(args.output_file, 'w', encoding="utf-8") as file:
533 sbom_writers.TagValueWriter.write(doc, file)
534 if args.json:
535 with open(args.output_file+'.json', 'w', encoding="utf-8") as file:
536 sbom_writers.JSONWriter.write(doc, file)
537
538
539if __name__ == '__main__':
540 main()