storageproxyd: Add watchdog to log stuck requests
Adds a background watchdog thread to the Trusty storageproxy to monitor
and log any requests that are delayed for too long. This is intended to
assist in recognizing and debugging any sources of hangs related to
servicing Trusty storage requests in Android.
Test: Add sleep() and run trusty storage tests
Bug: 266595872
Change-Id: I98dfaff0dff04f97d5c657ee131e746f63b39739
diff --git a/trusty/storage/proxy/Android.bp b/trusty/storage/proxy/Android.bp
index e952ee0..2e97ee0 100644
--- a/trusty/storage/proxy/Android.bp
+++ b/trusty/storage/proxy/Android.bp
@@ -28,6 +28,7 @@
"rpmb.c",
"storage.c",
"proxy.c",
+ "watchdog.cpp",
],
shared_libs: [
diff --git a/trusty/storage/proxy/proxy.c b/trusty/storage/proxy/proxy.c
index 4f77fa2..c89c5b6 100644
--- a/trusty/storage/proxy/proxy.c
+++ b/trusty/storage/proxy/proxy.c
@@ -31,6 +31,7 @@
#include "log.h"
#include "rpmb.h"
#include "storage.h"
+#include "watchdog.h"
#define REQ_BUFFER_SIZE 4096
static uint8_t req_buffer[REQ_BUFFER_SIZE + 1];
@@ -73,6 +74,8 @@
static int handle_req(struct storage_msg* msg, const void* req, size_t req_len) {
int rc;
+ struct watcher* watcher = watch_start("request", msg);
+
if ((msg->flags & STORAGE_MSG_FLAG_POST_COMMIT) && msg->cmd != STORAGE_RPMB_SEND &&
msg->cmd != STORAGE_FILE_WRITE) {
/*
@@ -81,14 +84,14 @@
*/
ALOGE("cmd 0x%x: post commit option is not implemented\n", msg->cmd);
msg->result = STORAGE_ERR_UNIMPLEMENTED;
- return ipc_respond(msg, NULL, 0);
+ goto err_response;
}
if (msg->flags & STORAGE_MSG_FLAG_PRE_COMMIT) {
- rc = storage_sync_checkpoint();
+ rc = storage_sync_checkpoint(watcher);
if (rc < 0) {
msg->result = STORAGE_ERR_SYNC_FAILURE;
- return ipc_respond(msg, NULL, 0);
+ goto err_response;
}
}
@@ -99,61 +102,65 @@
if (rc != 0) {
ALOGE("is_data_checkpoint_active failed in an unexpected way. Aborting.\n");
msg->result = STORAGE_ERR_GENERIC;
- return ipc_respond(msg, NULL, 0);
+ goto err_response;
} else if (is_checkpoint_active) {
ALOGE("Checkpoint in progress, dropping write ...\n");
msg->result = STORAGE_ERR_GENERIC;
- return ipc_respond(msg, NULL, 0);
+ goto err_response;
}
}
switch (msg->cmd) {
case STORAGE_FILE_DELETE:
- rc = storage_file_delete(msg, req, req_len);
+ rc = storage_file_delete(msg, req, req_len, watcher);
break;
case STORAGE_FILE_OPEN:
- rc = storage_file_open(msg, req, req_len);
+ rc = storage_file_open(msg, req, req_len, watcher);
break;
case STORAGE_FILE_CLOSE:
- rc = storage_file_close(msg, req, req_len);
+ rc = storage_file_close(msg, req, req_len, watcher);
break;
case STORAGE_FILE_WRITE:
- rc = storage_file_write(msg, req, req_len);
+ rc = storage_file_write(msg, req, req_len, watcher);
break;
case STORAGE_FILE_READ:
- rc = storage_file_read(msg, req, req_len);
+ rc = storage_file_read(msg, req, req_len, watcher);
break;
case STORAGE_FILE_GET_SIZE:
- rc = storage_file_get_size(msg, req, req_len);
+ rc = storage_file_get_size(msg, req, req_len, watcher);
break;
case STORAGE_FILE_SET_SIZE:
- rc = storage_file_set_size(msg, req, req_len);
+ rc = storage_file_set_size(msg, req, req_len, watcher);
break;
case STORAGE_FILE_GET_MAX_SIZE:
- rc = storage_file_get_max_size(msg, req, req_len);
+ rc = storage_file_get_max_size(msg, req, req_len, watcher);
break;
case STORAGE_RPMB_SEND:
- rc = rpmb_send(msg, req, req_len);
+ rc = rpmb_send(msg, req, req_len, watcher);
break;
default:
ALOGE("unhandled command 0x%x\n", msg->cmd);
msg->result = STORAGE_ERR_UNIMPLEMENTED;
- rc = 1;
+ goto err_response;
}
- if (rc > 0) {
- /* still need to send response */
- rc = ipc_respond(msg, NULL, 0);
- }
+ /* response was sent in handler */
+ goto finish;
+
+err_response:
+ rc = ipc_respond(msg, NULL, 0);
+
+finish:
+ watch_finish(watcher);
return rc;
}
diff --git a/trusty/storage/proxy/rpmb.c b/trusty/storage/proxy/rpmb.c
index b1b8232..22a85a7 100644
--- a/trusty/storage/proxy/rpmb.c
+++ b/trusty/storage/proxy/rpmb.c
@@ -321,7 +321,8 @@
return SCSI_RES_ERR;
}
-static int send_mmc_rpmb_req(int mmc_fd, const struct storage_rpmb_send_req* req) {
+static int send_mmc_rpmb_req(int mmc_fd, const struct storage_rpmb_send_req* req,
+ struct watcher* watcher) {
union {
struct mmc_ioc_multi_cmd multi;
uint8_t raw[sizeof(struct mmc_ioc_multi_cmd) + sizeof(struct mmc_ioc_cmd) * 3];
@@ -375,14 +376,17 @@
cmd++;
}
+ watch_progress(watcher, "rpmb mmc ioctl");
rc = ioctl(mmc_fd, MMC_IOC_MULTI_CMD, &mmc.multi);
+ watch_progress(watcher, "rpmb mmc ioctl done");
if (rc < 0) {
ALOGE("%s: mmc ioctl failed: %d, %s\n", __func__, rc, strerror(errno));
}
return rc;
}
-static int send_ufs_rpmb_req(int sg_fd, const struct storage_rpmb_send_req* req) {
+static int send_ufs_rpmb_req(int sg_fd, const struct storage_rpmb_send_req* req,
+ struct watcher* watcher) {
int rc;
int wl_rc;
const uint8_t* write_buf = req->payload;
@@ -410,7 +414,9 @@
set_sg_io_hdr(&io_hdr, SG_DXFER_TO_DEV, sizeof(out_cdb), sizeof(sense_buffer),
req->reliable_write_size, (void*)write_buf, (unsigned char*)&out_cdb,
sense_buffer);
+ watch_progress(watcher, "rpmb ufs reliable write");
rc = ioctl(sg_fd, SG_IO, &io_hdr);
+ watch_progress(watcher, "rpmb ufs reliable write done");
if (rc < 0) {
ALOGE("%s: ufs ioctl failed: %d, %s\n", __func__, rc, strerror(errno));
goto err_op;
@@ -435,7 +441,9 @@
set_sg_io_hdr(&io_hdr, SG_DXFER_TO_DEV, sizeof(out_cdb), sizeof(sense_buffer),
req->write_size, (void*)write_buf, (unsigned char*)&out_cdb,
sense_buffer);
+ watch_progress(watcher, "rpmb ufs write");
rc = ioctl(sg_fd, SG_IO, &io_hdr);
+ watch_progress(watcher, "rpmb ufs write done");
if (rc < 0) {
ALOGE("%s: ufs ioctl failed: %d, %s\n", __func__, rc, strerror(errno));
goto err_op;
@@ -450,7 +458,9 @@
sg_io_hdr_t io_hdr;
set_sg_io_hdr(&io_hdr, SG_DXFER_FROM_DEV, sizeof(in_cdb), sizeof(sense_buffer),
req->read_size, read_buf, (unsigned char*)&in_cdb, sense_buffer);
+ watch_progress(watcher, "rpmb ufs read");
rc = ioctl(sg_fd, SG_IO, &io_hdr);
+ watch_progress(watcher, "rpmb ufs read done");
if (rc < 0) {
ALOGE("%s: ufs ioctl failed: %d, %s\n", __func__, rc, strerror(errno));
}
@@ -487,7 +497,7 @@
return rc;
}
-int rpmb_send(struct storage_msg* msg, const void* r, size_t req_len) {
+int rpmb_send(struct storage_msg* msg, const void* r, size_t req_len, struct watcher* watcher) {
int rc;
const struct storage_rpmb_send_req* req = r;
@@ -523,13 +533,13 @@
}
if (dev_type == MMC_RPMB) {
- rc = send_mmc_rpmb_req(rpmb_fd, req);
+ rc = send_mmc_rpmb_req(rpmb_fd, req, watcher);
if (rc < 0) {
msg->result = STORAGE_ERR_GENERIC;
goto err_response;
}
} else if (dev_type == UFS_RPMB) {
- rc = send_ufs_rpmb_req(rpmb_fd, req);
+ rc = send_ufs_rpmb_req(rpmb_fd, req, watcher);
if (rc < 0) {
ALOGE("send_ufs_rpmb_req failed: %d, %s\n", rc, strerror(errno));
msg->result = STORAGE_ERR_GENERIC;
diff --git a/trusty/storage/proxy/rpmb.h b/trusty/storage/proxy/rpmb.h
index f4e1b51..04bdf9a 100644
--- a/trusty/storage/proxy/rpmb.h
+++ b/trusty/storage/proxy/rpmb.h
@@ -18,8 +18,10 @@
#include <stdint.h>
#include <trusty/interface/storage.h>
+#include "watchdog.h"
+
enum dev_type { UNKNOWN_RPMB, MMC_RPMB, VIRT_RPMB, UFS_RPMB, SOCK_RPMB };
int rpmb_open(const char* rpmb_devname, enum dev_type dev_type);
-int rpmb_send(struct storage_msg* msg, const void* r, size_t req_len);
+int rpmb_send(struct storage_msg* msg, const void* r, size_t req_len, struct watcher* watcher);
void rpmb_close(void);
diff --git a/trusty/storage/proxy/storage.c b/trusty/storage/proxy/storage.c
index a96ddcb..2299481 100644
--- a/trusty/storage/proxy/storage.c
+++ b/trusty/storage/proxy/storage.c
@@ -31,6 +31,7 @@
#include "ipc.h"
#include "log.h"
#include "storage.h"
+#include "watchdog.h"
#define FD_TBL_SIZE 64
#define MAX_READ_SIZE 4096
@@ -180,9 +181,8 @@
return rcnt;
}
-int storage_file_delete(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_delete(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
char *path = NULL;
const struct storage_file_delete_req *req = r;
@@ -208,6 +208,7 @@
goto err_response;
}
+ watch_progress(watcher, "unlinking file");
rc = unlink(path);
if (rc < 0) {
rc = errno;
@@ -231,8 +232,9 @@
return ipc_respond(msg, NULL, 0);
}
-static void sync_parent(const char* path) {
+static void sync_parent(const char* path, struct watcher* watcher) {
int parent_fd;
+ watch_progress(watcher, "syncing parent");
char* parent_path = dirname(path);
parent_fd = TEMP_FAILURE_RETRY(open(parent_path, O_RDONLY));
if (parent_fd >= 0) {
@@ -242,9 +244,11 @@
ALOGE("%s: failed to open parent directory \"%s\" for sync: %s\n", __func__, parent_path,
strerror(errno));
}
+ watch_progress(watcher, "done syncing parent");
}
-int storage_file_open(struct storage_msg* msg, const void* r, size_t req_len) {
+int storage_file_open(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
char* path = NULL;
const struct storage_file_open_req *req = r;
struct storage_file_open_resp resp = {0};
@@ -306,7 +310,7 @@
char* parent_path = dirname(path);
rc = mkdir(parent_path, S_IRWXU);
if (rc == 0) {
- sync_parent(parent_path);
+ sync_parent(parent_path, watcher);
} else if (errno != EEXIST) {
ALOGE("%s: Could not create parent directory \"%s\": %s\n", __func__, parent_path,
strerror(errno));
@@ -347,7 +351,7 @@
}
if (open_flags & O_CREAT) {
- sync_parent(path);
+ sync_parent(path, watcher);
}
free(path);
@@ -375,9 +379,8 @@
return ipc_respond(msg, NULL, 0);
}
-int storage_file_close(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_close(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
const struct storage_file_close_req *req = r;
if (req_len != sizeof(*req)) {
@@ -390,7 +393,9 @@
int fd = remove_fd(req->handle);
ALOGV("%s: handle = %u: fd = %u\n", __func__, req->handle, fd);
+ watch_progress(watcher, "fsyncing before file close");
int rc = fsync(fd);
+ watch_progress(watcher, "done fsyncing before file close");
if (rc < 0) {
rc = errno;
ALOGE("%s: fsync failed for fd=%u: %s\n",
@@ -414,10 +419,8 @@
return ipc_respond(msg, NULL, 0);
}
-
-int storage_file_write(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_write(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
int rc;
const struct storage_file_write_req *req = r;
@@ -429,17 +432,20 @@
}
int fd = lookup_fd(req->handle, true);
+ watch_progress(watcher, "writing");
if (write_with_retry(fd, &req->data[0], req_len - sizeof(*req),
req->offset) < 0) {
+ watch_progress(watcher, "writing done w/ error");
rc = errno;
ALOGW("%s: error writing file (fd=%d): %s\n",
__func__, fd, strerror(errno));
msg->result = translate_errno(rc);
goto err_response;
}
+ watch_progress(watcher, "writing done");
if (msg->flags & STORAGE_MSG_FLAG_POST_COMMIT) {
- rc = storage_sync_checkpoint();
+ rc = storage_sync_checkpoint(watcher);
if (rc < 0) {
msg->result = STORAGE_ERR_SYNC_FAILURE;
goto err_response;
@@ -452,10 +458,8 @@
return ipc_respond(msg, NULL, 0);
}
-
-int storage_file_read(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_read(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
int rc;
const struct storage_file_read_req *req = r;
@@ -474,8 +478,10 @@
}
int fd = lookup_fd(req->handle, false);
+ watch_progress(watcher, "reading");
ssize_t read_res = read_with_retry(fd, read_rsp.hdr.data, req->size,
(off_t)req->offset);
+ watch_progress(watcher, "reading done");
if (read_res < 0) {
rc = errno;
ALOGW("%s: error reading file (fd=%d): %s\n",
@@ -491,10 +497,8 @@
return ipc_respond(msg, NULL, 0);
}
-
-int storage_file_get_size(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_get_size(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
const struct storage_file_get_size_req *req = r;
struct storage_file_get_size_resp resp = {0};
@@ -507,7 +511,9 @@
struct stat stat;
int fd = lookup_fd(req->handle, false);
+ watch_progress(watcher, "fstat");
int rc = fstat(fd, &stat);
+ watch_progress(watcher, "fstat done");
if (rc < 0) {
rc = errno;
ALOGE("%s: error stat'ing file (fd=%d): %s\n",
@@ -524,10 +530,8 @@
return ipc_respond(msg, NULL, 0);
}
-
-int storage_file_set_size(struct storage_msg *msg,
- const void *r, size_t req_len)
-{
+int storage_file_set_size(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
const struct storage_file_set_size_req *req = r;
if (req_len != sizeof(*req)) {
@@ -538,7 +542,9 @@
}
int fd = lookup_fd(req->handle, true);
+ watch_progress(watcher, "ftruncate");
int rc = TEMP_FAILURE_RETRY(ftruncate(fd, req->size));
+ watch_progress(watcher, "ftruncate done");
if (rc < 0) {
rc = errno;
ALOGE("%s: error truncating file (fd=%d): %s\n",
@@ -553,7 +559,8 @@
return ipc_respond(msg, NULL, 0);
}
-int storage_file_get_max_size(struct storage_msg* msg, const void* r, size_t req_len) {
+int storage_file_get_max_size(struct storage_msg* msg, const void* r, size_t req_len,
+ struct watcher* watcher) {
const struct storage_file_get_max_size_req* req = r;
struct storage_file_get_max_size_resp resp = {0};
uint64_t max_size = 0;
@@ -566,7 +573,9 @@
struct stat stat;
int fd = lookup_fd(req->handle, false);
+ watch_progress(watcher, "fstat to get max size");
int rc = fstat(fd, &stat);
+ watch_progress(watcher, "fstat to get max size done");
if (rc < 0) {
ALOGE("%s: error stat'ing file (fd=%d): %s\n", __func__, fd, strerror(errno));
goto err_response;
@@ -606,10 +615,10 @@
return 0;
}
-int storage_sync_checkpoint(void)
-{
+int storage_sync_checkpoint(struct watcher* watcher) {
int rc;
+ watch_progress(watcher, "sync fd table");
/* sync fd table and reset it to clean state first */
for (uint fd = 0; fd < FD_TBL_SIZE; fd++) {
if (fd_state[fd] == SS_DIRTY) {
@@ -634,10 +643,12 @@
* because our fd table is large enough to handle the few open files we
* use.
*/
- sync();
- fs_state = SS_CLEAN;
+ watch_progress(watcher, "all fs sync");
+ sync();
+ fs_state = SS_CLEAN;
}
+ watch_progress(watcher, "done syncing");
+
return 0;
}
-
diff --git a/trusty/storage/proxy/storage.h b/trusty/storage/proxy/storage.h
index 77bfa13..f29fdf2 100644
--- a/trusty/storage/proxy/storage.h
+++ b/trusty/storage/proxy/storage.h
@@ -18,30 +18,33 @@
#include <stdint.h>
#include <trusty/interface/storage.h>
-int storage_file_delete(struct storage_msg *msg,
- const void *req, size_t req_len);
+/* Defined in watchdog.h */
+struct watcher;
-int storage_file_open(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_delete(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_close(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_open(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_write(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_close(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_read(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_write(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_get_size(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_read(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_set_size(struct storage_msg *msg,
- const void *req, size_t req_len);
+int storage_file_get_size(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_file_get_max_size(struct storage_msg* msg, const void* req, size_t req_len);
+int storage_file_set_size(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_init(const char *dirname);
+int storage_file_get_max_size(struct storage_msg* msg, const void* req, size_t req_len,
+ struct watcher* watcher);
-int storage_sync_checkpoint(void);
+int storage_init(const char* dirname);
+int storage_sync_checkpoint(struct watcher* watcher);
diff --git a/trusty/storage/proxy/watchdog.cpp b/trusty/storage/proxy/watchdog.cpp
new file mode 100644
index 0000000..6c09e26
--- /dev/null
+++ b/trusty/storage/proxy/watchdog.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "watchdog.h"
+
+#include <chrono>
+#include <cstdint>
+#include <optional>
+#include <thread>
+#include <vector>
+
+#include <android-base/logging.h>
+
+struct watcher {
+ watcher(const char* id, const struct storage_msg* request);
+ void SetState(const char* new_state);
+ void LogTimeout();
+ void LogFinished();
+
+ const char* id_;
+ uint32_t cmd_;
+ uint32_t op_id_;
+ uint32_t flags_;
+ const char* state_;
+
+ using clock = std::chrono::high_resolution_clock;
+ clock::time_point start_;
+ clock::time_point state_change_;
+ std::chrono::milliseconds Elapsed(clock::time_point end);
+
+ bool triggered_;
+};
+
+watcher::watcher(const char* id, const struct storage_msg* request)
+ : id_(id), state_(nullptr), triggered_(false) {
+ cmd_ = request->cmd;
+ op_id_ = request->op_id;
+ flags_ = request->flags;
+
+ start_ = clock::now();
+ state_change_ = start_;
+}
+
+void watcher::SetState(const char* new_state) {
+ state_ = new_state;
+ state_change_ = clock::now();
+}
+
+void watcher::LogTimeout() {
+ if (!triggered_) {
+ triggered_ = true;
+ LOG(ERROR) << "Storageproxyd watchdog triggered: " << id_ << " cmd: " << cmd_
+ << " op_id: " << op_id_ << " flags: " << flags_;
+ }
+ if (state_) {
+ LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms (" << state_ << " "
+ << Elapsed(state_change_).count() << "ms)";
+ } else {
+ LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms";
+ }
+}
+
+void watcher::LogFinished() {
+ if (triggered_) {
+ LOG(ERROR) << "...completed: " << Elapsed(clock::now()).count() << "ms";
+ }
+}
+
+std::chrono::milliseconds watcher::Elapsed(watcher::clock::time_point end) {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(end - start_);
+}
+
+namespace {
+
+class Watchdog {
+ private:
+ static constexpr std::chrono::milliseconds kDefaultTimeoutMs = std::chrono::milliseconds(500);
+ static constexpr std::chrono::milliseconds kMaxTimeoutMs = std::chrono::seconds(10);
+
+ public:
+ Watchdog() : watcher_(), done_(false) {}
+ ~Watchdog();
+ struct watcher* RegisterWatch(const char* id, const struct storage_msg* request);
+ void AddProgress(struct watcher* watcher, const char* state);
+ void UnRegisterWatch(struct watcher* watcher);
+
+ private:
+ // Syncronizes access to watcher_ and watcher_change_ between the main
+ // thread and watchdog loop thread. watcher_ may only be modified by the
+ // main thread; the watchdog loop is read-only.
+ std::mutex watcher_mutex_;
+ std::unique_ptr<struct watcher> watcher_;
+ std::condition_variable watcher_change_;
+
+ std::thread watchdog_thread_;
+ bool done_;
+
+ void WatchdogLoop();
+ void LogWatchdogTriggerLocked();
+};
+
+Watchdog gWatchdog;
+
+} // Anonymous namespace
+
+// Assumes that caller is single-threaded. If we want to use this from a
+// multi-threaded context we need to ensure that the watchdog thread is
+// initialized safely once and accessing an existing watcher is done while the
+// watcher lock is held.
+struct watcher* Watchdog::RegisterWatch(const char* id, const struct storage_msg* request) {
+ if (!watchdog_thread_.joinable()) {
+ watchdog_thread_ = std::thread(&Watchdog::WatchdogLoop, this);
+ }
+ if (watcher_) {
+ LOG(ERROR) << "Replacing registered watcher " << watcher_->id_;
+ UnRegisterWatch(watcher_.get());
+ }
+
+ struct watcher* ret = nullptr;
+ {
+ std::unique_lock<std::mutex> watcherLock(watcher_mutex_);
+ watcher_ = std::make_unique<struct watcher>(id, request);
+ ret = watcher_.get();
+ }
+ watcher_change_.notify_one();
+ return ret;
+}
+
+void Watchdog::UnRegisterWatch(struct watcher* watcher) {
+ {
+ std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
+ if (!watcher_) {
+ LOG(ERROR) << "Cannot unregister watcher, no watcher registered";
+ return;
+ }
+ if (watcher_.get() != watcher) {
+ LOG(ERROR) << "Unregistering watcher that doesn't match current watcher";
+ }
+ watcher_->LogFinished();
+ watcher_.reset(nullptr);
+ }
+ watcher_change_.notify_one();
+}
+
+void Watchdog::AddProgress(struct watcher* watcher, const char* state) {
+ std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
+ if (watcher_.get() != watcher) {
+ LOG(ERROR) << "Watcher was not registered, cannot log progress: " << state;
+ return;
+ }
+ watcher->SetState(state);
+}
+
+void Watchdog::WatchdogLoop() {
+ std::unique_lock<std::mutex> lock(watcher_mutex_);
+ std::chrono::milliseconds timeout = kDefaultTimeoutMs;
+
+ while (!done_) {
+ // wait for a watch to be registered
+ watcher_change_.wait(lock, [this] { return !!watcher_; });
+
+ // wait for the timeout or unregistration
+ timeout = kDefaultTimeoutMs;
+ do {
+ if (!watcher_change_.wait_for(lock, timeout, [this] { return !watcher_; })) {
+ watcher_->LogTimeout();
+ timeout = std::min(timeout * 2, kMaxTimeoutMs);
+ }
+ } while (!!watcher_);
+ }
+}
+
+Watchdog::~Watchdog() {
+ {
+ std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
+ watcher_.reset(nullptr);
+ done_ = true;
+ }
+ watcher_change_.notify_one();
+ if (watchdog_thread_.joinable()) {
+ watchdog_thread_.join();
+ }
+}
+
+struct watcher* watch_start(const char* id, const struct storage_msg* request) {
+ return gWatchdog.RegisterWatch(id, request);
+}
+
+void watch_progress(struct watcher* watcher, const char* state) {
+ gWatchdog.AddProgress(watcher, state);
+}
+
+void watch_finish(struct watcher* watcher) {
+ gWatchdog.UnRegisterWatch(watcher);
+}
diff --git a/trusty/storage/proxy/watchdog.h b/trusty/storage/proxy/watchdog.h
new file mode 100644
index 0000000..9162fcb
--- /dev/null
+++ b/trusty/storage/proxy/watchdog.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "storage.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct watcher;
+
+/**
+ * watch_start() - Create a watcher for a storage request
+ * @id: Identifier string to distinguish watchers
+ * @request: Incoming request from Trusty storage service
+ *
+ * Create a watcher that will start logging if not finished before a timeout.
+ * Only one watcher may be active at a time, and this function may only be
+ * called from a single thread.
+ */
+struct watcher* watch_start(const char* id, const struct storage_msg* request);
+
+/**
+ * watch_progress() - Note progress on servicing the current request
+ * @watcher: Current watcher, created by watch()
+ *
+ * Sets the current progress state of the watcher, to allow for more granular
+ * reporting of what exactly is stuck if the timeout is reached.
+ */
+void watch_progress(struct watcher* watcher, const char* state);
+
+/**
+ * watch_finish() - Finish watching and unregister the watch
+ * @watcher: Current watcher, created by watch(). Takes ownership of this pointer.
+ *
+ * Finish the current watch task. This function takes ownership of the watcher
+ * and destroys it, so @watcher must not be used again after calling this
+ * function.
+ */
+void watch_finish(struct watcher* watcher);
+
+#ifdef __cplusplus
+}
+#endif