Update the NOTICE.xml.gz generation logic in soong
Bug: 393602576
Test: m notice_xml_system
Change-Id: I459fe4c24ab9ac311fedce5b9ab0f2e385ea41d8
diff --git a/tools/sbom/Android.bp b/tools/sbom/Android.bp
index 4f6d3b7..7e2840f 100644
--- a/tools/sbom/Android.bp
+++ b/tools/sbom/Android.bp
@@ -129,5 +129,7 @@
},
},
libs: [
+ "compliance_metadata",
+ "metadata_file_proto_py",
],
}
diff --git a/tools/sbom/compliance_metadata.py b/tools/sbom/compliance_metadata.py
index aba61a8..3bc873c 100644
--- a/tools/sbom/compliance_metadata.py
+++ b/tools/sbom/compliance_metadata.py
@@ -132,6 +132,21 @@
installed_files_metadata.append(metadata)
return installed_files_metadata
+ def get_installed_file_in_dir(self, dir):
+ dir = dir.removesuffix('/')
+ cursor = self.conn.execute(
+ 'select installed_file, module_path, is_prebuilt_make_module, product_copy_files, '
+ ' kernel_module_copy_files, is_platform_generated, license_text '
+ 'from make_metadata '
+ 'where installed_file like ?', (dir + '/%',))
+ rows = cursor.fetchall()
+ cursor.close()
+ installed_files_metadata = []
+ for row in rows:
+ metadata = dict(zip(row.keys(), row))
+ installed_files_metadata.append(metadata)
+ return installed_files_metadata
+
def get_soong_modules(self):
# Get all records from table modules, which contains metadata of all soong modules
cursor = self.conn.execute('select name, package, package as module_path, module_type as soong_module_type, built_files, installed_files, static_dep_files, whole_static_dep_files from modules')
diff --git a/tools/sbom/gen_notice_xml.py b/tools/sbom/gen_notice_xml.py
index eaa6e5a..9e61338 100644
--- a/tools/sbom/gen_notice_xml.py
+++ b/tools/sbom/gen_notice_xml.py
@@ -25,6 +25,14 @@
"""
import argparse
+import compliance_metadata
+import google.protobuf.text_format as text_format
+import gzip
+import hashlib
+import metadata_file_pb2
+import os
+import queue
+import xml.sax.saxutils
FILE_HEADER = '''\
@@ -55,27 +63,162 @@
print(i)
-def new_file_name_tag(file_metadata, package_name):
+def new_file_name_tag(file_metadata, package_name, content_id):
file_path = file_metadata['installed_file'].removeprefix(args.product_out)
lib = 'Android'
if package_name:
lib = package_name
- return f'<file-name contentId="" lib="{lib}">{file_path}</file-name>\n'
+ return f'<file-name contentId="{content_id}" lib="{lib}">{file_path}</file-name>\n'
-def new_file_content_tag():
- pass
+def new_file_content_tag(content_id, license_text):
+ escaped_license_text = xml.sax.saxutils.escape(license_text, {'\t': '	', '\n': '
', '\r': '
'})
+ return f'<file-content contentId="{content_id}"><![CDATA[{escaped_license_text}]]></file-content>\n\n'
+def get_metadata_file_path(file_metadata):
+ """Search for METADATA file of a package and return its path."""
+ metadata_path = ''
+ if file_metadata['module_path']:
+ metadata_path = file_metadata['module_path']
+ elif file_metadata['kernel_module_copy_files']:
+ metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
+
+ while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
+ metadata_path = os.path.dirname(metadata_path)
+
+ return metadata_path
+
+def md5_file_content(filepath):
+ h = hashlib.md5()
+ with open(filepath, 'rb') as f:
+ h.update(f.read())
+ return h.hexdigest()
+
+def get_transitive_static_dep_modules(installed_file_metadata, db):
+ # Find all transitive static dep files of the installed files
+ q = queue.Queue()
+ if installed_file_metadata['static_dep_files']:
+ for f in installed_file_metadata['static_dep_files'].split(' '):
+ q.put(f)
+ if installed_file_metadata['whole_static_dep_files']:
+ for f in installed_file_metadata['whole_static_dep_files'].split(' '):
+ q.put(f)
+
+ static_dep_files = {}
+ while not q.empty():
+ dep_file = q.get()
+ if dep_file in static_dep_files:
+ # It has been processed
+ continue
+
+ soong_module = db.get_soong_module_of_built_file(dep_file)
+ if not soong_module:
+ continue
+
+ static_dep_files[dep_file] = soong_module
+
+ if soong_module['static_dep_files']:
+ for f in soong_module['static_dep_files'].split(' '):
+ if f not in static_dep_files:
+ q.put(f)
+ if soong_module['whole_static_dep_files']:
+ for f in soong_module['whole_static_dep_files'].split(' '):
+ if f not in static_dep_files:
+ q.put(f)
+
+ return static_dep_files.values()
def main():
global args
args = get_args()
log('Args:', vars(args))
- with open(args.output_file, 'w', encoding="utf-8") as notice_xml_file:
+ global db
+ db = compliance_metadata.MetadataDb(args.metadata)
+ if args.debug:
+ db.dump_debug_db(os.path.dirname(args.output_file) + '/compliance-metadata-debug.db')
+
+ # NOTICE.xml
+ notice_xml_file_path = os.path.dirname(args.output_file) + '/NOTICE.xml'
+ with open(notice_xml_file_path, 'w', encoding="utf-8") as notice_xml_file:
notice_xml_file.write(FILE_HEADER)
+
+ all_license_files = {}
+ for metadata in db.get_installed_file_in_dir(args.product_out + '/' + args.partition):
+ soong_module = db.get_soong_module_of_installed_file(metadata['installed_file'])
+ if soong_module:
+ metadata.update(soong_module)
+ else:
+ # For make modules soong_module_type should be empty
+ metadata['soong_module_type'] = ''
+ metadata['static_dep_files'] = ''
+ metadata['whole_static_dep_files'] = ''
+
+ installed_file_metadata_list = [metadata]
+ if args.partition in ('vendor', 'product', 'system_ext'):
+ # For transitive static dependencies of an installed file, make it as if an installed file are
+ # also created from static dependency modules whose licenses are also collected
+ static_dep_modules = get_transitive_static_dep_modules(metadata, db)
+ for dep in static_dep_modules:
+ dep['installed_file'] = metadata['installed_file']
+ installed_file_metadata_list.append(dep)
+
+ for installed_file_metadata in installed_file_metadata_list:
+ package_name = 'Android'
+ licenses = {}
+ if installed_file_metadata['module_path']:
+ metadata_file_path = get_metadata_file_path(installed_file_metadata)
+ if metadata_file_path:
+ proto = metadata_file_pb2.Metadata()
+ with open(metadata_file_path + '/METADATA', 'rt') as f:
+ text_format.Parse(f.read(), proto)
+ if proto.name:
+ package_name = proto.name
+ if proto.third_party and proto.third_party.version:
+ if proto.third_party.version.startswith('v'):
+ package_name = package_name + '_' + proto.third_party.version
+ else:
+ package_name = package_name + '_v_' + proto.third_party.version
+ else:
+ package_name = metadata_file_path
+ if metadata_file_path.startswith('external/'):
+ package_name = metadata_file_path.removeprefix('external/')
+
+ # Every license file is in a <file-content> element
+ licenses = db.get_module_licenses(installed_file_metadata.get('name', ''), installed_file_metadata['module_path'])
+
+ # Installed file is from PRODUCT_COPY_FILES
+ elif metadata['product_copy_files']:
+ licenses['unused_name'] = metadata['license_text']
+
+ # Installed file is generated by the platform in builds
+ elif metadata['is_platform_generated']:
+ licenses['unused_name'] = metadata['license_text']
+
+ if licenses:
+ # Each value is a space separated filepath list
+ for license_files in licenses.values():
+ if not license_files:
+ continue
+ for filepath in license_files.split(' '):
+ if filepath not in all_license_files:
+ all_license_files[filepath] = md5_file_content(filepath)
+ md5 = all_license_files[filepath]
+ notice_xml_file.write(new_file_name_tag(installed_file_metadata, package_name, md5))
+
+ # Licenses
+ processed_md5 = []
+ for filepath, md5 in all_license_files.items():
+ if md5 not in processed_md5:
+ processed_md5.append(md5)
+ with open(filepath, 'rt', errors='backslashreplace') as f:
+ notice_xml_file.write(new_file_content_tag(md5, f.read()))
+
notice_xml_file.write(FILE_FOOTER)
+ # NOTICE.xml.gz
+ with open(notice_xml_file_path, 'rb') as notice_xml_file, gzip.open(args.output_file, 'wb') as gz_file:
+ gz_file.writelines(notice_xml_file)
if __name__ == '__main__':
main()