Support third_party.identifier in METADATA files of external packages.
Bug: 303688820
Test: CIs
Test: "m sbom" after lunch
Change-Id: Ic329d87cdcfbe4152b0fe6a8fd71c4867593b674
diff --git a/tools/protos/metadata_file.proto b/tools/protos/metadata_file.proto
index ac1129a..47562c5 100644
--- a/tools/protos/metadata_file.proto
+++ b/tools/protos/metadata_file.proto
@@ -92,6 +92,8 @@
SBOMRef sbom_ref = 10;
}
+ // Identifiers for the package.
+ repeated Identifier identifier = 11;
}
// URL associated with a third-party package.
@@ -278,4 +280,136 @@
// https://spdx.github.io/spdx-spec/v2.3/package-information/#72-package-spdx-identifier-field or
// https://spdx.github.io/spdx-spec/v2.3/file-information/#82-file-spdx-identifier-field
optional string element_id = 3;
+}
+
+// Identifier for a third-package package.
+// See go/tp-metadata-id.
+message Identifier {
+ // The type of the identifier. Either an "ecosystem" value from
+ // https://ossf.github.io/osv-schema/#affectedpackage-field such as "Go",
+ // "npm" or "PyPI". The "value" and "version" fields follow the same rules as
+ // defined in the OSV spec.
+
+ // Or one of:
+ // - "Git": The "value" field is the URL of the upstream git repository this
+ // package is retrieved from.
+ // For example:
+ // - https://github.com/git/git
+ // - git://git.kernel.org/pub/scm/git/git
+ //
+ // Use of a git URL requires that the package "version" value must specify a
+ // specific git tag or revision. This must not be a branch name.
+ //
+ // - "SVN": The "value" field is the URL of the upstream SVN repository this
+ // package is retrieved from.
+ // For example:
+ // - http://llvm.org/svn/llvm-project/llvm/
+ //
+ // Use of an SVN URL requires that the package "version" value must specify
+ // a specific SVN tag or revision. This must not be a branch name.
+ //
+ // - "Hg": The "value" field is the URL of the upstream mercurial repository
+ // this package is retrieved from.
+ // For example:
+ // - https://mercurial-scm.org/repo/evolve
+ //
+ // Use of a mercurial URL requires that the package "version" value must
+ // specify a specific tag or revision. This must not be a branch name.
+ //
+ // - "Darcs": the "value" field is the URL of the upstream darcs repository
+ // this package is retrieved from.
+ // For example:
+ // - https://hub.darcs.net/hu.dwim/hu.dwim.util
+ //
+ // Use of a Darcs URL requires that the package "version" value must
+ // specify a specific tag or revision. This must not be a branch name.
+ //
+ // - "Piper": The "value" field is the URL of the upstream piper location.
+ // This is primarily used when a package is being migrated into third_party
+ // from elsewhere in Piper, or when a package is being newly developed in
+ // third_party.
+ //
+ // - "VCS": This is a generic fallback for an unlisted VCS system. The
+ // "value" field is the URL of the repository for this VCS.
+ //
+ // - "Archive": The "value" field is the URL of the archive containing the
+ // source code for the package, for example a zip or tgz file.
+ //
+ // - "PrebuiltByAlphabet": This type should be used for archives of primarily
+ // Google-owned source code (may contain non-Google-owned dependencies),
+ // which has been built using production Google infrastructure, and copied
+ // into third_party.
+ //
+ // - "LocalSource": The "value" field is the URL identifying where the local
+ // copy of the package source code can be found.
+ // Examples:
+ // - https://android.googlesource.com/platform/external/apache-http/
+ //
+ // Typically, the metadata files describing a package reside in the same
+ // directory as the source code for the package. In a few rare cases where
+ // they are separate, the LocalSource URL identifies where to find the
+ // source code. This only describes where to find the local copy of the
+ // source; there should always be an additional URL describing where the
+ // package was retrieved from.
+ //
+ // - "Other": An identifier that does not fit any other type. This may also
+ // indicate that the Source code was received via email or some other
+ // out-of-band way. This is most commonly used with commercial software
+ // received directly from the Vendor. In the case of email, the "value" field
+ // can be used to provide additional information about how it was received.
+ optional string type = 1;
+
+ // A human readable string to indicate why a third-package package does not
+ // have this identifier type set.
+ // Example:
+ // identifier {
+ // type: "PyPI"
+ // omission_reason: "Only on Git. Not published to PyPI."
+ // }
+ optional string omission_reason = 2;
+
+ // The value of the package identifier as defined by the "type".
+ // Example:
+ // identifier {
+ // type: "PyPI"
+ // value: "django"
+ // version: "3.2.8"
+ // }
+ optional string value = 3;
+
+ // The version associated with this package as defined by the "type".
+ // Example:
+ // identifier {
+ // type: "PyPI"
+ // value: "django"
+ // version: "3.2.8"
+ // }
+ optional string version = 4;
+
+ // The closest version associated with this package as defined by the "type".
+ // This should only be set by automated infrastructure by applying automated
+ // heuristics, such as the closest git tag or package version from a package
+ // manifest file (e.g. pom.xml).
+ //
+ // For most identifier types, only one of `version` or `closest_version`
+ // should be set (not both). The exception is source repository types such as
+ // "Git", where `version` will refer to a git commit, and `closest_version`
+ // refers to a git tag.
+ // Example:
+ // identifier {
+ // type: "Git",
+ // value: "https://github.com/my/repo"
+ // version: "e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e"
+ // closest_version: "v1.4"
+ // }
+ optional string closest_version = 5;
+
+ // When `true`, this Identifier represents the location from which the source
+ // code for this package was originally obtained. This should only be set for
+ // *one* Identifier in a third_party package's METADATA.
+
+ // For external packages, this is typically for the Identifier associated
+ // with the version control system or package manager that was used to
+ // check out or download the code.
+ optional bool primary_source = 6;
}
\ No newline at end of file
diff --git a/tools/sbom/generate-sbom.py b/tools/sbom/generate-sbom.py
index b19be87..0a8f10a 100755
--- a/tools/sbom/generate-sbom.py
+++ b/tools/sbom/generate-sbom.py
@@ -82,6 +82,46 @@
'vndk_prebuilt_shared',
]
+THIRD_PARTY_IDENTIFIER_TYPES = [
+ # Types defined in metadata_file.proto
+ 'Git',
+ 'SVN',
+ 'Hg',
+ 'Darcs',
+ 'VCS',
+ 'Archive',
+ 'PrebuiltByAlphabet',
+ 'LocalSource',
+ 'Other',
+ # OSV ecosystems defined at https://ossf.github.io/osv-schema/#affectedpackage-field.
+ 'Go',
+ 'npm',
+ 'OSS-Fuzz',
+ 'PyPI',
+ 'RubyGems',
+ 'crates.io',
+ 'Hackage',
+ 'GHC',
+ 'Packagist',
+ 'Maven',
+ 'NuGet',
+ 'Linux',
+ 'Debian',
+ 'Alpine',
+ 'Hex',
+ 'Android',
+ 'GitHub Actions',
+ 'Pub',
+ 'ConanCenter',
+ 'Rocky Linux',
+ 'AlmaLinux',
+ 'Bitnami',
+ 'Photon OS',
+ 'CRAN',
+ 'Bioconductor',
+ 'SwiftURL'
+]
+
def get_args():
parser = argparse.ArgumentParser()
@@ -360,6 +400,20 @@
return True
+# Validate identifiers in a package's METADATA.
+# 1) Only known identifier type is allowed
+# 2) Only one identifier's primary_source can be true
+def validate_package_metadata(metadata_file_path, package_metadata):
+ primary_source_found = False
+ for identifier in package_metadata.third_party.identifier:
+ if identifier.type not in THIRD_PARTY_IDENTIFIER_TYPES:
+ sys.exit(f'Unknown value of third_party.identifier.type in {metadata_file_path}/METADATA: {identifier.type}.')
+ if primary_source_found and identifier.primary_source:
+ sys.exit(
+ f'Field "primary_source" is set to true in multiple third_party.identifier in {metadata_file_path}/METADATA.')
+ primary_source_found = identifier.primary_source
+
+
def report_metadata_file(metadata_file_path, installed_file_metadata, report):
if metadata_file_path:
report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
@@ -372,6 +426,8 @@
with open(metadata_file_path + '/METADATA', 'rt') as f:
text_format.Parse(f.read(), package_metadata)
+ validate_package_metadata(metadata_file_path, package_metadata)
+
if not metadata_file_path in metadata_file_protos:
metadata_file_protos[metadata_file_path] = package_metadata
if not package_metadata.name: