liblog: retry -ENOTCONN | -ECONNREFUSED | -ENOENT indefinitely
Deal with recovering after transitory failures surrounding logd
crash or recovery. Improve the chances that the logging functions
can work in a signal handler, not officially supported, but making
sure logging is not blamed for system lockups when misused.
Reorder gTests so that setuid(AID_SYSTEM) is performed after
liblog.enoent test, and that this occurs after other tests that
like to see buffers with content in them as we stop logd.
Test: gTest liblog-unit-tests --gtest_filter=liblog.enoent
Bug: 33755074
Change-Id: I66f88599534614b7b61da6b2ae5fe099ebaced3a
diff --git a/liblog/logd_writer.c b/liblog/logd_writer.c
index 8fdfb92..2bab92e 100644
--- a/liblog/logd_writer.c
+++ b/liblog/logd_writer.c
@@ -50,7 +50,7 @@
LIBLOG_HIDDEN struct android_log_transport_write logdLoggerWrite = {
.node = { &logdLoggerWrite.node, &logdLoggerWrite.node },
- .context.sock = -1,
+ .context.sock = -EBADF,
.name = "logd",
.available = logdAvailable,
.open = logdOpen,
@@ -65,8 +65,10 @@
i = atomic_load(&logdLoggerWrite.context.sock);
if (i < 0) {
- i = TEMP_FAILURE_RETRY(socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0));
- if (i < 0) {
+ int sock = TEMP_FAILURE_RETRY(socket(PF_UNIX, SOCK_DGRAM |
+ SOCK_CLOEXEC |
+ SOCK_NONBLOCK, 0));
+ if (sock < 0) {
ret = -errno;
} else {
struct sockaddr_un un;
@@ -74,13 +76,22 @@
un.sun_family = AF_UNIX;
strcpy(un.sun_path, "/dev/socket/logdw");
- if (TEMP_FAILURE_RETRY(connect(i, (struct sockaddr *)&un,
+ if (TEMP_FAILURE_RETRY(connect(sock, (struct sockaddr *)&un,
sizeof(struct sockaddr_un))) < 0) {
ret = -errno;
- close(i);
+ switch (ret) {
+ case -ENOTCONN:
+ case -ECONNREFUSED:
+ case -ENOENT:
+ i = atomic_exchange(&logdLoggerWrite.context.sock, ret);
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ close(sock);
} else {
- ret = atomic_exchange(&logdLoggerWrite.context.sock, i);
- if ((ret >= 0) && (ret != i)) {
+ ret = atomic_exchange(&logdLoggerWrite.context.sock, sock);
+ if ((ret >= 0) && (ret != sock)) {
close(ret);
}
ret = 0;
@@ -91,14 +102,19 @@
return ret;
}
-static void logdClose()
+static void __logdClose(int negative_errno)
{
- int sock = atomic_exchange(&logdLoggerWrite.context.sock, -1);
+ int sock = atomic_exchange(&logdLoggerWrite.context.sock, negative_errno);
if (sock >= 0) {
close(sock);
}
}
+static void logdClose()
+{
+ __logdClose(-EBADF);
+}
+
static int logdAvailable(log_id_t logId)
{
if (logId > LOG_ID_SECURITY) {
@@ -117,6 +133,7 @@
struct iovec *vec, size_t nr)
{
ssize_t ret;
+ int sock;
static const unsigned headerLength = 1;
struct iovec newVec[nr + headerLength];
android_log_header_t header;
@@ -124,7 +141,13 @@
static atomic_int_fast32_t dropped;
static atomic_int_fast32_t droppedSecurity;
- if (atomic_load(&logdLoggerWrite.context.sock) < 0) {
+ sock = atomic_load(&logdLoggerWrite.context.sock);
+ if (sock < 0) switch (sock) {
+ case -ENOTCONN:
+ case -ECONNREFUSED:
+ case -ENOENT:
+ break;
+ default:
return -EBADF;
}
@@ -163,7 +186,7 @@
newVec[0].iov_base = (unsigned char *)&header;
newVec[0].iov_len = sizeof(header);
- if (atomic_load(&logdLoggerWrite.context.sock) > 0) {
+ if (sock >= 0) {
int32_t snapshot = atomic_exchange_explicit(&droppedSecurity, 0,
memory_order_relaxed);
if (snapshot) {
@@ -177,8 +200,7 @@
newVec[headerLength].iov_base = &buffer;
newVec[headerLength].iov_len = sizeof(buffer);
- ret = TEMP_FAILURE_RETRY(writev(
- atomic_load(&logdLoggerWrite.context.sock), newVec, 2));
+ ret = TEMP_FAILURE_RETRY(writev(sock, newVec, 2));
if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
atomic_fetch_add_explicit(&droppedSecurity, snapshot,
memory_order_relaxed);
@@ -186,7 +208,8 @@
}
snapshot = atomic_exchange_explicit(&dropped, 0, memory_order_relaxed);
if (snapshot && __android_log_is_loggable_len(ANDROID_LOG_INFO,
- "liblog", strlen("liblog"),
+ "liblog",
+ strlen("liblog"),
ANDROID_LOG_VERBOSE)) {
android_log_event_int_t buffer;
@@ -198,8 +221,7 @@
newVec[headerLength].iov_base = &buffer;
newVec[headerLength].iov_len = sizeof(buffer);
- ret = TEMP_FAILURE_RETRY(writev(
- atomic_load(&logdLoggerWrite.context.sock), newVec, 2));
+ ret = TEMP_FAILURE_RETRY(writev(sock, newVec, 2));
if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
atomic_fetch_add_explicit(&dropped, snapshot,
memory_order_relaxed);
@@ -225,30 +247,43 @@
/*
* The write below could be lost, but will never block.
*
- * ENOTCONN occurs if logd dies.
+ * ENOTCONN occurs if logd has died.
+ * ENOENT occurs if logd is not running and socket is missing.
+ * ECONNREFUSED occurs if we can not reconnect to logd.
* EAGAIN occurs if logd is overloaded.
*/
- ret = TEMP_FAILURE_RETRY(writev(
- atomic_load(&logdLoggerWrite.context.sock), newVec, i));
- if (ret < 0) {
- ret = -errno;
- if (ret == -ENOTCONN) {
- __android_log_lock();
- logdClose();
- ret = logdOpen();
- __android_log_unlock();
-
- if (ret < 0) {
- return ret;
- }
-
- ret = TEMP_FAILURE_RETRY(writev(
- atomic_load(&logdLoggerWrite.context.sock), newVec, i));
- if (ret < 0) {
- ret = -errno;
- }
+ if (sock < 0) {
+ ret = sock;
+ } else {
+ ret = TEMP_FAILURE_RETRY(writev(sock, newVec, i));
+ if (ret < 0) {
+ ret = -errno;
}
}
+ switch(ret) {
+ case -ENOTCONN:
+ case -ECONNREFUSED:
+ case -ENOENT:
+ if (__android_log_trylock()) {
+ return ret; /* in a signal handler? try again when less stressed */
+ }
+ __logdClose(ret);
+ ret = logdOpen();
+ __android_log_unlock();
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = TEMP_FAILURE_RETRY(writev(
+ atomic_load(&logdLoggerWrite.context.sock), newVec, i));
+ if (ret < 0) {
+ ret = -errno;
+ }
+ /* FALLTHRU */
+ default:
+ break;
+ }
if (ret > (ssize_t)sizeof(header)) {
ret -= sizeof(header);