Parse APK manifest to extract version etc

Add libapkmanifest to encapsulate the parsing, with a small C++
library on top of libandroidfw.

Extract the zip-handling code from libapkverify into a separate
libapkzip, and fix up a bunch of tests tests, to keep the build happy.

We don't do anything with the manifest information except log it; more
to come in another CL.

Bug: 299591171
Test: atest libapkzip.test libapkverify.integration_test
      libapkverify.test
Test: Manual - run VM, inspect logs.
Change-Id: I56d3bb7309d43ecb598a33320705d31948710f83
diff --git a/libs/apkmanifest/Android.bp b/libs/apkmanifest/Android.bp
new file mode 100644
index 0000000..e6fcbef
--- /dev/null
+++ b/libs/apkmanifest/Android.bp
@@ -0,0 +1,46 @@
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+cc_library_shared {
+    name: "libapkmanifest_native",
+    srcs: ["native/*.cpp"],
+    shared_libs: [
+        "libandroidfw",
+        "libbase",
+        "liblog",
+        "libutils",
+    ],
+}
+
+rust_bindgen {
+    name: "libapkmanifest_bindgen",
+    defaults: ["avf_build_flags_rust"],
+    edition: "2021",
+    wrapper_src: "native/apkmanifest.hpp",
+    crate_name: "apkmanifest_bindgen",
+    source_stem: "bindings",
+    bindgen_flags: [
+        "--default-enum-style rust",
+    ],
+}
+
+rust_library {
+    name: "libapkmanifest",
+    crate_name: "apkmanifest",
+    defaults: ["avf_build_flags_rust"],
+    edition: "2021",
+    srcs: ["src/apkmanifest.rs"],
+    rustlibs: [
+        "libanyhow",
+        "libapkzip",
+        "libapkmanifest_bindgen",
+        "libscopeguard",
+    ],
+    shared_libs: ["libapkmanifest_native"],
+    multilib: {
+        lib32: {
+            enabled: false,
+        },
+    },
+}
diff --git a/libs/apkmanifest/native/apkmanifest.cpp b/libs/apkmanifest/native/apkmanifest.cpp
new file mode 100644
index 0000000..ab0ba72
--- /dev/null
+++ b/libs/apkmanifest/native/apkmanifest.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apkmanifest.hpp"
+
+#include <android-base/logging.h>
+#include <android-base/result.h>
+#include <androidfw/AssetsProvider.h>
+#include <androidfw/ResourceTypes.h>
+#include <androidfw/StringPiece.h>
+#include <androidfw/Util.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <utils/Errors.h>
+
+#include <cstdlib>
+#include <limits>
+#include <string>
+#include <string_view>
+
+using android::Asset;
+using android::AssetsProvider;
+using android::OK;
+using android::Res_value;
+using android::ResXMLParser;
+using android::ResXMLTree;
+using android::statusToString;
+using android::StringPiece16;
+using android::base::Error;
+using android::base::Result;
+using android::util::Utf16ToUtf8;
+using std::u16string_view;
+using std::unique_ptr;
+
+struct ApkManifestInfo {
+    std::string package;
+    uint32_t version_code;
+    uint32_t version_code_major;
+};
+
+namespace {
+// See https://developer.android.com/guide/topics/manifest/manifest-element
+constexpr u16string_view MANIFEST_TAG_NAME{u"manifest"};
+constexpr u16string_view ANDROID_NAMESPACE_URL{u"http://schemas.android.com/apk/res/android"};
+constexpr u16string_view PACKAGE_ATTRIBUTE_NAME{u"package"};
+constexpr u16string_view VERSION_CODE_ATTRIBUTE_NAME{u"versionCode"};
+constexpr u16string_view VERSION_CODE_MAJOR_ATTRIBUTE_NAME{u"versionCodeMajor"};
+
+// Read through the XML parse tree up to the <manifest> element.
+Result<void> findManifestElement(ResXMLTree& tree) {
+    for (;;) {
+        ResXMLParser::event_code_t event = tree.next();
+        switch (event) {
+            case ResXMLParser::END_DOCUMENT:
+            case ResXMLParser::END_TAG:
+            case ResXMLParser::TEXT:
+            default:
+                return Error() << "Unexpected XML parsing event: " << event;
+            case ResXMLParser::BAD_DOCUMENT:
+                return Error() << "Failed to parse XML: " << statusToString(tree.getError());
+            case ResXMLParser::START_NAMESPACE:
+            case ResXMLParser::END_NAMESPACE:
+                // Not of interest, keep going.
+                break;
+            case ResXMLParser::START_TAG:
+                // The first tag in an AndroidManifest.xml should be <manifest> (no namespace).
+                // And that's actually the only tag we care about.
+                if (tree.getElementNamespaceID() >= 0) {
+                    return Error() << "Root element has unexpected namespace.";
+                }
+                size_t nameLength = 0;
+                const char16_t* nameChars = tree.getElementName(&nameLength);
+                if (!nameChars) {
+                    return Error() << "Missing tag name";
+                }
+                if (u16string_view(nameChars, nameLength) != MANIFEST_TAG_NAME) {
+                    return Error() << "Expected <manifest> as root element";
+                }
+                return {};
+        }
+    }
+}
+
+// Return an attribute encoded as a string, converted to UTF-8. Note that all
+// attributes are strings in the original XML, but the binary format encodes
+// some as binary numbers etc. This function does not handle converting those
+// encodings back to strings, so should only be used when it is known that a
+// numeric value is not allowed.
+Result<std::string> getStringOnlyAttribute(const ResXMLTree& tree, size_t index) {
+    size_t len;
+    const char16_t* value = tree.getAttributeStringValue(index, &len);
+    if (!value) {
+        return Error() << "Expected attribute to have string value";
+    }
+    return Utf16ToUtf8(StringPiece16(value, len));
+}
+
+// Return the u32 value of an attribute.
+Result<uint32_t> getU32Attribute(const ResXMLTree& tree, size_t index) {
+    auto type = tree.getAttributeDataType(index);
+    switch (type) {
+        case Res_value::TYPE_INT_DEC:
+        case Res_value::TYPE_INT_HEX:
+            // This is how we'd expect the version to be encoded - and we don't
+            // care what base it was originally in.
+            return tree.getAttributeData(index);
+        case Res_value::TYPE_STRING: {
+            // If the original string is encoded, then we need to convert it.
+            auto str = OR_RETURN(getStringOnlyAttribute(tree, index));
+            char* str_end = nullptr;
+            // Note that by specifying base 0 we allow for octal, hex, or
+            // decimal representations here.
+            unsigned long value = std::strtoul(str.c_str(), &str_end, 0);
+            if (str_end != str.c_str() + str.size() ||
+                value > std::numeric_limits<uint32_t>::max()) {
+                return Error() << "Invalid numeric value";
+            }
+            return static_cast<uint32_t>(value);
+        }
+        default:
+            return Error() << "Expected numeric value, got type " << type;
+    }
+}
+
+// Parse the binary manifest and extract the information we care about.
+// Everything we're interested in should be an attribute on the <manifest> tag.
+// We don't care what order they come in, absent attributes will be treated as
+// the default value, and any unknown attributes (including ones not in the
+// expected namespace) will be ignored.
+Result<unique_ptr<ApkManifestInfo>> parseManifest(const void* manifest, size_t size) {
+    ResXMLTree tree;
+    auto status = tree.setTo(manifest, size);
+    if (status != OK) {
+        return Error() << "Failed to create XML Tree: " << statusToString(status);
+    }
+
+    OR_RETURN(findManifestElement(tree));
+
+    unique_ptr<ApkManifestInfo> info{new ApkManifestInfo{}};
+
+    size_t count = tree.getAttributeCount();
+    for (size_t i = 0; i < count; ++i) {
+        size_t len;
+        const char16_t* chars;
+
+        chars = tree.getAttributeNamespace(i, &len);
+        auto namespaceUrl = chars ? u16string_view(chars, len) : u16string_view();
+
+        chars = tree.getAttributeName(i, &len);
+        auto attributeName = chars ? u16string_view(chars, len) : u16string_view();
+
+        if (namespaceUrl.empty()) {
+            if (attributeName == PACKAGE_ATTRIBUTE_NAME) {
+                auto result = getStringOnlyAttribute(tree, i);
+                if (!result.ok()) return Error() << "Package name: " << result.error();
+                info->package = *result;
+            }
+        } else if (namespaceUrl == ANDROID_NAMESPACE_URL) {
+            if (attributeName == VERSION_CODE_ATTRIBUTE_NAME) {
+                auto result = getU32Attribute(tree, i);
+                if (!result.ok()) return Error() << "Version code: " << result.error();
+                info->version_code = *result;
+            } else if (attributeName == VERSION_CODE_MAJOR_ATTRIBUTE_NAME) {
+                auto result = getU32Attribute(tree, i);
+                if (!result.ok()) return Error() << "Version code major: " << result.error();
+                info->version_code_major = *result;
+            }
+        }
+    }
+
+    return info;
+}
+} // namespace
+
+const ApkManifestInfo* extractManifestInfo(const void* manifest, size_t size) {
+    auto result = parseManifest(manifest, size);
+    if (!result.ok()) {
+        LOG(ERROR) << "Failed to parse APK manifest:" << result.error().message();
+        return nullptr;
+    }
+    return result->release();
+}
+
+void freeManifestInfo(const ApkManifestInfo* info) {
+    delete info;
+}
+
+const char* getPackageName(const ApkManifestInfo* info) {
+    return info->package.c_str();
+}
+
+uint64_t getVersionCode(const ApkManifestInfo* info) {
+    return info->version_code | (static_cast<uint64_t>(info->version_code_major) << 32);
+}
diff --git a/libs/apkmanifest/native/apkmanifest.hpp b/libs/apkmanifest/native/apkmanifest.hpp
new file mode 100644
index 0000000..352912e
--- /dev/null
+++ b/libs/apkmanifest/native/apkmanifest.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Opaque structure holding information extracted from an APK manifest.
+struct ApkManifestInfo;
+
+extern "C" {
+
+// Parse a binary XML encoded APK manifest and extract relevant information.
+// The caller must free the returned pointer using freeManifestInfo.  Returns
+// null if any error occurs. Does not retain any pointer to the manifest
+// provided.
+const ApkManifestInfo* extractManifestInfo(const void* manifest, size_t size);
+
+// Frees an ApkManifestInfo allocated by extractManifestInfo; this invalidates
+// the pointer and it must not be used again.
+void freeManifestInfo(const ApkManifestInfo* info);
+
+// Given a valid ApkManifestInfo pointer, return the package name of the APK, as
+// a nul-terminated UTF-8 string. The pointer remains valid until the
+// ApkManifestInfo is freed.
+const char* getPackageName(const ApkManifestInfo* info);
+
+// Given a valid ApkManifestInfo pointer, return the version code of the APK.
+uint64_t getVersionCode(const ApkManifestInfo* info);
+}
diff --git a/libs/apkmanifest/src/apkmanifest.rs b/libs/apkmanifest/src/apkmanifest.rs
new file mode 100644
index 0000000..6766b21
--- /dev/null
+++ b/libs/apkmanifest/src/apkmanifest.rs
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! Handle parsing of APK manifest files.
+//! The manifest file is written as XML text, but is stored in the APK
+//! as Android binary compressed XML. This library is a wrapper around
+//! a thin C++ wrapper around libandroidfw, which contains the same
+//! parsing code as used by package manager and aapt2 (amongst other
+//! things).
+
+use anyhow::{bail, Context, Result};
+use apkmanifest_bindgen::{extractManifestInfo, freeManifestInfo, getPackageName, getVersionCode};
+use std::ffi::CStr;
+use std::fs::File;
+use std::path::Path;
+
+/// Information extracted from the Android manifest inside an APK.
+#[derive(Debug)]
+pub struct ApkManifestInfo {
+    /// The package name of the app.
+    pub package: String,
+    /// The version code of the app.
+    pub version_code: u64,
+}
+
+const ANDROID_MANIFEST: &str = "AndroidManifest.xml";
+
+/// Find the manifest inside the given APK and return information from it.
+pub fn get_manifest_info<P: AsRef<Path>>(apk_path: P) -> Result<ApkManifestInfo> {
+    let apk = File::open(apk_path.as_ref())?;
+    let manifest = apkzip::read_file(apk, ANDROID_MANIFEST)?;
+
+    // Safety: The function only reads the memory range we specify and does not hold
+    // any reference to it.
+    let native_info = unsafe { extractManifestInfo(manifest.as_ptr() as _, manifest.len()) };
+    if native_info.is_null() {
+        bail!("Failed to parse manifest")
+    };
+
+    scopeguard::defer! {
+        // Safety: The value we pass is the result of calling extractManifestInfo as required.
+        // We must call this exactly once, after we have finished using it, which the scopeguard
+        // ensures.
+        unsafe { freeManifestInfo(native_info); }
+    }
+
+    // Safety: It is always safe to call this with a valid native_info, which we have,
+    // and it always returns a valid nul-terminated C string with the same lifetime as native_info.
+    // We immediately make a copy.
+    let package = unsafe { CStr::from_ptr(getPackageName(native_info)) };
+    let package = package.to_str().context("Invalid package name")?.to_string();
+
+    // Safety: It is always safe to call this with a valid native_info, which we have.
+    let version_code = unsafe { getVersionCode(native_info) };
+
+    Ok(ApkManifestInfo { package, version_code })
+}