liblog: retry -ENOTCONN | -ECONNREFUSED | -ENOENT indefinitely

Deal with recovering after transitory failures surrounding logd
crash or recovery.  Improve the chances that the logging functions
can work in a signal handler, not officially supported, but making
sure logging is not blamed for system lockups when misused.

Reorder gTests so that setuid(AID_SYSTEM) is performed after
liblog.enoent test, and that this occurs after other tests that
like to see buffers with content in them as we stop logd.

Test: gTest liblog-unit-tests --gtest_filter=liblog.enoent
Bug: 33755074
Change-Id: I66f88599534614b7b61da6b2ae5fe099ebaced3a
diff --git a/liblog/logd_writer.c b/liblog/logd_writer.c
index 8fdfb92..2bab92e 100644
--- a/liblog/logd_writer.c
+++ b/liblog/logd_writer.c
@@ -50,7 +50,7 @@
 
 LIBLOG_HIDDEN struct android_log_transport_write logdLoggerWrite = {
     .node = { &logdLoggerWrite.node, &logdLoggerWrite.node },
-    .context.sock = -1,
+    .context.sock = -EBADF,
     .name = "logd",
     .available = logdAvailable,
     .open = logdOpen,
@@ -65,8 +65,10 @@
 
     i = atomic_load(&logdLoggerWrite.context.sock);
     if (i < 0) {
-        i = TEMP_FAILURE_RETRY(socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0));
-        if (i < 0) {
+        int sock = TEMP_FAILURE_RETRY(socket(PF_UNIX, SOCK_DGRAM |
+                                                      SOCK_CLOEXEC |
+                                                      SOCK_NONBLOCK, 0));
+        if (sock < 0) {
             ret = -errno;
         } else {
             struct sockaddr_un un;
@@ -74,13 +76,22 @@
             un.sun_family = AF_UNIX;
             strcpy(un.sun_path, "/dev/socket/logdw");
 
-            if (TEMP_FAILURE_RETRY(connect(i, (struct sockaddr *)&un,
+            if (TEMP_FAILURE_RETRY(connect(sock, (struct sockaddr *)&un,
                                            sizeof(struct sockaddr_un))) < 0) {
                 ret = -errno;
-                close(i);
+                switch (ret) {
+                case -ENOTCONN:
+                case -ECONNREFUSED:
+                case -ENOENT:
+                    i = atomic_exchange(&logdLoggerWrite.context.sock, ret);
+                    /* FALLTHRU */
+                default:
+                    break;
+                }
+                close(sock);
             } else {
-                ret = atomic_exchange(&logdLoggerWrite.context.sock, i);
-                if ((ret >= 0) && (ret != i)) {
+                ret = atomic_exchange(&logdLoggerWrite.context.sock, sock);
+                if ((ret >= 0) && (ret != sock)) {
                     close(ret);
                 }
                 ret = 0;
@@ -91,14 +102,19 @@
     return ret;
 }
 
-static void logdClose()
+static void __logdClose(int negative_errno)
 {
-    int sock = atomic_exchange(&logdLoggerWrite.context.sock, -1);
+    int sock = atomic_exchange(&logdLoggerWrite.context.sock, negative_errno);
     if (sock >= 0) {
         close(sock);
     }
 }
 
+static void logdClose()
+{
+    __logdClose(-EBADF);
+}
+
 static int logdAvailable(log_id_t logId)
 {
     if (logId > LOG_ID_SECURITY) {
@@ -117,6 +133,7 @@
                      struct iovec *vec, size_t nr)
 {
     ssize_t ret;
+    int sock;
     static const unsigned headerLength = 1;
     struct iovec newVec[nr + headerLength];
     android_log_header_t header;
@@ -124,7 +141,13 @@
     static atomic_int_fast32_t dropped;
     static atomic_int_fast32_t droppedSecurity;
 
-    if (atomic_load(&logdLoggerWrite.context.sock) < 0) {
+    sock = atomic_load(&logdLoggerWrite.context.sock);
+    if (sock < 0) switch (sock) {
+    case -ENOTCONN:
+    case -ECONNREFUSED:
+    case -ENOENT:
+        break;
+    default:
         return -EBADF;
     }
 
@@ -163,7 +186,7 @@
     newVec[0].iov_base = (unsigned char *)&header;
     newVec[0].iov_len  = sizeof(header);
 
-    if (atomic_load(&logdLoggerWrite.context.sock) > 0) {
+    if (sock >= 0) {
         int32_t snapshot = atomic_exchange_explicit(&droppedSecurity, 0,
                                                     memory_order_relaxed);
         if (snapshot) {
@@ -177,8 +200,7 @@
             newVec[headerLength].iov_base = &buffer;
             newVec[headerLength].iov_len  = sizeof(buffer);
 
-            ret = TEMP_FAILURE_RETRY(writev(
-                    atomic_load(&logdLoggerWrite.context.sock), newVec, 2));
+            ret = TEMP_FAILURE_RETRY(writev(sock, newVec, 2));
             if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
                 atomic_fetch_add_explicit(&droppedSecurity, snapshot,
                                           memory_order_relaxed);
@@ -186,7 +208,8 @@
         }
         snapshot = atomic_exchange_explicit(&dropped, 0, memory_order_relaxed);
         if (snapshot && __android_log_is_loggable_len(ANDROID_LOG_INFO,
-                                                      "liblog", strlen("liblog"),
+                                                      "liblog",
+                                                      strlen("liblog"),
                                                       ANDROID_LOG_VERBOSE)) {
             android_log_event_int_t buffer;
 
@@ -198,8 +221,7 @@
             newVec[headerLength].iov_base = &buffer;
             newVec[headerLength].iov_len  = sizeof(buffer);
 
-            ret = TEMP_FAILURE_RETRY(writev(
-                      atomic_load(&logdLoggerWrite.context.sock), newVec, 2));
+            ret = TEMP_FAILURE_RETRY(writev(sock, newVec, 2));
             if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
                 atomic_fetch_add_explicit(&dropped, snapshot,
                                           memory_order_relaxed);
@@ -225,30 +247,43 @@
     /*
      * The write below could be lost, but will never block.
      *
-     * ENOTCONN occurs if logd dies.
+     * ENOTCONN occurs if logd has died.
+     * ENOENT occurs if logd is not running and socket is missing.
+     * ECONNREFUSED occurs if we can not reconnect to logd.
      * EAGAIN occurs if logd is overloaded.
      */
-    ret = TEMP_FAILURE_RETRY(writev(
-            atomic_load(&logdLoggerWrite.context.sock), newVec, i));
-    if (ret < 0) {
-        ret = -errno;
-        if (ret == -ENOTCONN) {
-            __android_log_lock();
-            logdClose();
-            ret = logdOpen();
-            __android_log_unlock();
-
-            if (ret < 0) {
-                return ret;
-            }
-
-            ret = TEMP_FAILURE_RETRY(writev(
-                    atomic_load(&logdLoggerWrite.context.sock), newVec, i));
-            if (ret < 0) {
-                ret = -errno;
-            }
+    if (sock < 0) {
+        ret = sock;
+    } else {
+        ret = TEMP_FAILURE_RETRY(writev(sock, newVec, i));
+        if (ret < 0) {
+            ret = -errno;
         }
     }
+    switch(ret) {
+    case -ENOTCONN:
+    case -ECONNREFUSED:
+    case -ENOENT:
+        if (__android_log_trylock()) {
+            return ret; /* in a signal handler? try again when less stressed */
+        }
+        __logdClose(ret);
+        ret = logdOpen();
+        __android_log_unlock();
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = TEMP_FAILURE_RETRY(writev(
+                atomic_load(&logdLoggerWrite.context.sock), newVec, i));
+        if (ret < 0) {
+            ret = -errno;
+        }
+        /* FALLTHRU */
+    default:
+        break;
+    }
 
     if (ret > (ssize_t)sizeof(header)) {
         ret -= sizeof(header);