trusty: Helper library for metrics

Bug: 182489121
Test: libtrusty_metrics_test
Change-Id: I7fc45ea3319185a8ac55889e56bd484d4774e928
diff --git a/trusty/metrics/Android.bp b/trusty/metrics/Android.bp
new file mode 100644
index 0000000..e0533bc
--- /dev/null
+++ b/trusty/metrics/Android.bp
@@ -0,0 +1,51 @@
+// Copyright (C) 2021 The Android Open-Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+cc_library {
+    name: "libtrusty_metrics",
+    vendor: true,
+    srcs: [
+        "metrics.cpp",
+    ],
+    export_include_dirs: [
+        "include",
+    ],
+    shared_libs: [
+        "libbase",
+        "liblog",
+        "libtrusty",
+    ],
+}
+
+cc_test {
+    name: "libtrusty_metrics_test",
+    vendor: true,
+    srcs: [
+        "metrics_test.cpp",
+    ],
+    static_libs: [
+        "libtrusty_metrics",
+    ],
+    shared_libs: [
+        "libbase",
+        "libbinder",
+        "liblog",
+        "libtrusty",
+    ],
+    require_root: true,
+}
diff --git a/trusty/metrics/include/trusty/metrics/metrics.h b/trusty/metrics/include/trusty/metrics/metrics.h
new file mode 100644
index 0000000..6949e9b
--- /dev/null
+++ b/trusty/metrics/include/trusty/metrics/metrics.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include <android-base/result.h>
+#include <android-base/unique_fd.h>
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::Result;
+using android::base::unique_fd;
+
+class TrustyMetrics {
+  public:
+    /* Wait for next event with a given timeout. Negative timeout means infinite timeout. */
+    Result<void> WaitForEvent(int timeout_ms = -1);
+    /* Attempt to handle an event from Metrics TA in a non-blocking manner. */
+    Result<void> HandleEvent();
+    /* Expose TIPC channel so that client can integrate it into an event loop with other fds. */
+    int GetRawFd() { return metrics_fd_; };
+
+  protected:
+    TrustyMetrics(std::string tipc_dev) : tipc_dev_(std::move(tipc_dev)), metrics_fd_(-1) {}
+    virtual ~TrustyMetrics(){};
+
+    Result<void> Open();
+    virtual void HandleCrash(const std::string& app_id) = 0;
+    virtual void HandleEventDrop() = 0;
+
+  private:
+    std::string tipc_dev_;
+    unique_fd metrics_fd_;
+};
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android
diff --git a/trusty/metrics/include/trusty/metrics/tipc.h b/trusty/metrics/include/trusty/metrics/tipc.h
new file mode 100644
index 0000000..66d0876
--- /dev/null
+++ b/trusty/metrics/include/trusty/metrics/tipc.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2021, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/**
+ * DOC: Metrics
+ *
+ * Metrics interface provides a way for Android to get Trusty metrics data.
+ *
+ * Currently, only "push" model is supported. Clients are expected to connect to
+ * metrics service, listen for events, e.g. app crash events, and respond to
+ * every event with a &struct metrics_req.
+ *
+ * Communication is driven by metrics service, i.e. requests/responses are all
+ * sent from/to metrics service.
+ *
+ * Note that the type of the event is not known to the client ahead of time.
+ *
+ * In the future, if we need to have Android "pull" metrics data from Trusty,
+ * that can be done by introducing a separate port.
+ *
+ * This interface is shared between Android and Trusty. There is a copy in each
+ * repository. They must be kept in sync.
+ */
+
+#define METRICS_PORT "com.android.trusty.metrics"
+
+/**
+ * enum metrics_cmd - command identifiers for metrics interface
+ * @METRICS_CMD_RESP_BIT:          message is a response
+ * @METRICS_CMD_REQ_SHIFT:         number of bits used by @METRICS_CMD_RESP_BIT
+ * @METRICS_CMD_REPORT_EVENT_DROP: report gaps in the event stream
+ * @METRICS_CMD_REPORT_CRASH:      report an app crash event
+ */
+enum metrics_cmd {
+    METRICS_CMD_RESP_BIT = 1,
+    METRICS_CMD_REQ_SHIFT = 1,
+
+    METRICS_CMD_REPORT_EVENT_DROP = (1 << METRICS_CMD_REQ_SHIFT),
+    METRICS_CMD_REPORT_CRASH = (2 << METRICS_CMD_REQ_SHIFT),
+};
+
+/**
+ * enum metrics_error - metrics error codes
+ * @METRICS_NO_ERROR:        no error
+ * @METRICS_ERR_UNKNOWN_CMD: unknown or not implemented command
+ */
+enum metrics_error {
+    METRICS_NO_ERROR = 0,
+    METRICS_ERR_UNKNOWN_CMD = 1,
+};
+
+/**
+ * struct metrics_req - common structure for metrics requests
+ * @cmd:      command identifier - one of &enum metrics_cmd
+ * @reserved: must be 0
+ */
+struct metrics_req {
+    uint32_t cmd;
+    uint32_t reserved;
+} __attribute__((__packed__));
+
+/**
+ * struct metrics_resp - common structure for metrics responses
+ * @cmd: command identifier - %METRICS_CMD_RESP_BIT or'ed with a cmd in
+ *                            one of &enum metrics_cmd
+ * @status: response status, one of &enum metrics_error
+ */
+struct metrics_resp {
+    uint32_t cmd;
+    uint32_t status;
+} __attribute__((__packed__));
+
+/**
+ * struct metrics_report_crash_req - arguments of %METRICS_CMD_REPORT_CRASH
+ *                                   requests
+ * @app_id_len: length of app ID that follows this structure
+ */
+struct metrics_report_crash_req {
+    uint32_t app_id_len;
+} __attribute__((__packed__));
+
+#define METRICS_MAX_APP_ID_LEN 256
+
+#define METRICS_MAX_MSG_SIZE                                                \
+    (sizeof(struct metrics_req) + sizeof(struct metrics_report_crash_req) + \
+     METRICS_MAX_APP_ID_LEN)
diff --git a/trusty/metrics/metrics.cpp b/trusty/metrics/metrics.cpp
new file mode 100644
index 0000000..3ac128a
--- /dev/null
+++ b/trusty/metrics/metrics.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2021 The Android Open Sourete Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "metrics"
+
+#include <android-base/logging.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <trusty/metrics/metrics.h>
+#include <trusty/metrics/tipc.h>
+#include <trusty/tipc.h>
+#include <unistd.h>
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::ErrnoError;
+using android::base::Error;
+
+Result<void> TrustyMetrics::Open() {
+    int fd = tipc_connect(tipc_dev_.c_str(), METRICS_PORT);
+    if (fd < 0) {
+        return ErrnoError() << "failed to connect to Trusty metrics TA";
+    }
+
+    int flags = fcntl(fd, F_GETFL, 0);
+    if (flags < 0) {
+        return ErrnoError() << "failed F_GETFL";
+    }
+
+    int rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+    if (rc < 0) {
+        return ErrnoError() << "failed F_SETFL";
+    }
+
+    metrics_fd_.reset(fd);
+    return {};
+}
+
+Result<void> TrustyMetrics::WaitForEvent(int timeout_ms) {
+    if (!metrics_fd_.ok()) {
+        return Error() << "connection to Metrics TA has not been initialized yet";
+    }
+
+    struct pollfd pfd = {
+            .fd = metrics_fd_,
+            .events = POLLIN,
+    };
+
+    int rc = poll(&pfd, 1, timeout_ms);
+    if (rc != 1) {
+        return ErrnoError() << "failed poll()";
+    }
+
+    if (!(pfd.revents & POLLIN)) {
+        return ErrnoError() << "channel not ready";
+    }
+
+    return {};
+}
+
+Result<void> TrustyMetrics::HandleEvent() {
+    if (!metrics_fd_.ok()) {
+        return Error() << "connection to Metrics TA has not been initialized yet";
+    }
+
+    uint8_t msg[METRICS_MAX_MSG_SIZE];
+
+    auto rc = read(metrics_fd_, msg, sizeof(msg));
+    if (rc < 0) {
+        return ErrnoError() << "failed to read metrics message";
+    }
+    size_t msg_len = rc;
+
+    if (msg_len < sizeof(metrics_req)) {
+        return Error() << "message too small: " << rc;
+    }
+    auto req = reinterpret_cast<metrics_req*>(msg);
+    size_t offset = sizeof(metrics_req);
+    uint32_t status = METRICS_NO_ERROR;
+
+    switch (req->cmd) {
+        case METRICS_CMD_REPORT_CRASH: {
+            if (msg_len < offset + sizeof(metrics_report_crash_req)) {
+                return Error() << "message too small: " << rc;
+            }
+            auto crash_args = reinterpret_cast<metrics_report_crash_req*>(msg + offset);
+            offset += sizeof(metrics_report_crash_req);
+
+            if (msg_len < offset + crash_args->app_id_len) {
+                return Error() << "message too small: " << rc;
+            }
+            auto app_id_ptr = reinterpret_cast<char*>(msg + offset);
+            std::string app_id(app_id_ptr, crash_args->app_id_len);
+
+            HandleCrash(app_id);
+            break;
+        }
+
+        case METRICS_CMD_REPORT_EVENT_DROP:
+            HandleEventDrop();
+            break;
+
+        default:
+            status = METRICS_ERR_UNKNOWN_CMD;
+            break;
+    }
+
+    metrics_resp resp = {
+            .cmd = req->cmd | METRICS_CMD_RESP_BIT,
+            .status = status,
+    };
+
+    rc = write(metrics_fd_, &resp, sizeof(resp));
+    if (rc < 0) {
+        return ErrnoError() << "failed to request next metrics event";
+    }
+
+    if (rc != (int)sizeof(resp)) {
+        return Error() << "unexpected number of bytes sent event: " << rc;
+    }
+
+    return {};
+}
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android
diff --git a/trusty/metrics/metrics_test.cpp b/trusty/metrics/metrics_test.cpp
new file mode 100644
index 0000000..407ddf2
--- /dev/null
+++ b/trusty/metrics/metrics_test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <android-base/unique_fd.h>
+#include <binder/IPCThreadState.h>
+#include <gtest/gtest.h>
+#include <poll.h>
+#include <trusty/metrics/metrics.h>
+#include <trusty/tipc.h>
+
+#define TIPC_DEV "/dev/trusty-ipc-dev0"
+#define CRASHER_PORT "com.android.trusty.metrics.test.crasher"
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::unique_fd;
+
+static void TriggerCrash() {
+    size_t num_retries = 3;
+    int fd = -1;
+
+    for (size_t i = 0; i < num_retries; i++) {
+        /* It's possible to time out waiting for crasher TA to restart. */
+        fd = tipc_connect(TIPC_DEV, CRASHER_PORT);
+        if (fd >= 0) {
+            break;
+        }
+    }
+
+    unique_fd crasher(fd);
+    ASSERT_GE(crasher, 0);
+
+    int msg = 0;
+    int rc = write(crasher, &msg, sizeof(msg));
+    ASSERT_EQ(rc, sizeof(msg));
+}
+
+class TrustyMetricsTest : public TrustyMetrics, public ::testing::Test {
+  public:
+    TrustyMetricsTest() : TrustyMetrics(TIPC_DEV) {}
+
+    virtual void HandleCrash(const std::string& app_id) override { crashed_app_ = app_id; }
+
+    virtual void HandleEventDrop() override { event_drop_count_++; }
+
+    virtual void SetUp() override {
+        auto ret = Open();
+        ASSERT_TRUE(ret.ok()) << ret.error();
+    }
+
+    void WaitForAndHandleEvent() {
+        auto ret = WaitForEvent(30000 /* 30 second timeout */);
+        ASSERT_TRUE(ret.ok()) << ret.error();
+
+        ret = HandleEvent();
+        ASSERT_TRUE(ret.ok()) << ret.error();
+    }
+
+    std::string crashed_app_;
+    size_t event_drop_count_;
+};
+
+TEST_F(TrustyMetricsTest, Crash) {
+    TriggerCrash();
+    WaitForAndHandleEvent();
+
+    /* Check that correct TA crashed. */
+    ASSERT_EQ(crashed_app_, "36f5b435-5bd3-4526-8b76-200e3a7e79f3:crasher");
+}
+
+TEST_F(TrustyMetricsTest, PollSet) {
+    int binder_fd;
+    int rc = IPCThreadState::self()->setupPolling(&binder_fd);
+    ASSERT_EQ(rc, 0);
+    ASSERT_GE(binder_fd, 0);
+
+    TriggerCrash();
+
+    struct pollfd pfds[] = {
+            {
+                    .fd = binder_fd,
+                    .events = POLLIN,
+            },
+            {
+                    .fd = GetRawFd(),
+                    .events = POLLIN,
+            },
+    };
+
+    rc = poll(pfds, 2, 30000 /* 30 second timeout */);
+    /* We expect one event on the metrics fd. */
+    ASSERT_EQ(rc, 1);
+    ASSERT_TRUE(pfds[1].revents & POLLIN);
+
+    auto ret = HandleEvent();
+    ASSERT_TRUE(ret.ok()) << ret.error();
+
+    /* Check that correct TA crashed. */
+    ASSERT_EQ(crashed_app_, "36f5b435-5bd3-4526-8b76-200e3a7e79f3:crasher");
+}
+
+TEST_F(TrustyMetricsTest, EventDrop) {
+    /* We know the size of the internal event queue is less than this. */
+    size_t num_events = 3;
+
+    ASSERT_EQ(event_drop_count_, 0);
+
+    for (auto i = 0; i < num_events; i++) {
+        TriggerCrash();
+    }
+
+    for (auto i = 0; i < num_events; i++) {
+        WaitForAndHandleEvent();
+        if (event_drop_count_ > 0) {
+            break;
+        }
+    }
+
+    ASSERT_EQ(event_drop_count_, 1);
+}
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android