logd: SerializedLogBuffer: never wait for a reader during prune/clear Previously, chatty had logic that would skip a certain number of log entries to satify pruning, but otherwise keep the reader connected. This was a best-effort attempt at helping pruning and had additional logic that handled further disruptions, if logd's memory was 2x the allotted memory. The new logic has two components: 1) memcpy() each individual log message in FlushTo() such that there are no references to the underlying log data without a lock held. Note, that this memcpy is completely negligible for performance. 2) In Prune(), immediately delete all log chunks required to reduce memory to the allotted amount, which is now safe given 1). If readers will lose logs, continue to print a warning. This additionally makes the Clear() logic deterministic. It was previously best effort in chatty, but will immediately and always clear all logs for SerializedLogBuffer. Bug: 163617910 Test: logging unit tests Test: Prune() immediately frees buffers during high log pressure Test: Clear() immediately frees buffers during high log pressure Change-Id: I40fe9b791312af3dc256b166e5c34425f4ca51ac

commit: 3932a9c0747b6aece517652ea0aba4be6c8df9b6 [log] [tgz]
author: Tom Cherry <tomcherry@google.com> Wed Aug 12 09:36:15 2020 -0700
committer: Tom Cherry <tomcherry@google.com> Wed Aug 12 12:49:56 2020 -0700
tree: 868eeaedd0f437366a9173ba0351bd120d627c7b
parent: d0e8451d59e41a8435daaca6c1702154f3d418de [diff] [blame]
diff --git a/logd/SerializedLogBuffer.cpp b/logd/SerializedLogBuffer.cpp
index f4bf37f..5012d3d 100644
--- a/logd/SerializedLogBuffer.cpp
+++ b/logd/SerializedLogBuffer.cpp

@@ -75,6 +75,10 @@
         return -EINVAL;
     }
 
+    if (len > LOGGER_ENTRY_MAX_PAYLOAD) {
+        len = LOGGER_ENTRY_MAX_PAYLOAD;
+    }
+
     if (!ShouldLog(log_id, msg, len)) {
         stats_->AddTotal(log_id, len);
         return -EACCES;
@@ -135,40 +139,42 @@
     }
 }
 
-bool SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) {
-    // Don't prune logs that are newer than the point at which any reader threads are reading from.
-    LogReaderThread* oldest = nullptr;
+void SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) {
     auto reader_threads_lock = std::lock_guard{reader_list_->reader_threads_lock()};
-    for (const auto& reader_thread : reader_list_->reader_threads()) {
-        if (!reader_thread->IsWatching(log_id)) {
-            continue;
-        }
-        if (reader_thread->deadline().time_since_epoch().count() != 0) {
-            // Always wake up wrapped readers when pruning.  'Wrapped' readers are an optimization
-            // that allows the reader to wait until logs starting at a specified time stamp are
-            // about to be pruned.  This is error-prone however, since if that timestamp is about to
-            // be pruned, the reader is not likely to read the messages fast enough to not back-up
-            // logd.  Instead, we can achieve an nearly-as-efficient but not error-prune batching
-            // effect by waking the reader whenever any chunk is about to be pruned.
-            reader_thread->triggerReader_Locked();
-        }
-        if (!oldest || oldest->start() > reader_thread->start()) {
-            oldest = reader_thread.get();
-        }
-    }
 
     auto& log_buffer = logs_[log_id];
     auto it = log_buffer.begin();
     while (it != log_buffer.end()) {
-        if (oldest != nullptr && it->highest_sequence_number() >= oldest->start()) {
-            break;
+        for (const auto& reader_thread : reader_list_->reader_threads()) {
+            if (!reader_thread->IsWatching(log_id)) {
+                continue;
+            }
+
+            if (reader_thread->deadline().time_since_epoch().count() != 0) {
+                // Always wake up wrapped readers when pruning.  'Wrapped' readers are an
+                // optimization that allows the reader to wait until logs starting at a specified
+                // time stamp are about to be pruned.  This is error-prone however, since if that
+                // timestamp is about to be pruned, the reader is not likely to read the messages
+                // fast enough to not back-up logd.  Instead, we can achieve an nearly-as-efficient
+                // but not error-prune batching effect by waking the reader whenever any chunk is
+                // about to be pruned.
+                reader_thread->triggerReader_Locked();
+            }
+
+            // Some readers may be still reading from this log chunk, log a warning that they are
+            // about to lose logs.
+            // TODO: We should forcefully disconnect the reader instead, such that the reader itself
+            // has an indication that they've lost logs.
+            if (reader_thread->start() <= it->highest_sequence_number()) {
+                LOG(WARNING) << "Skipping entries from slow reader, " << reader_thread->name()
+                             << ", from LogBuffer::Prune()";
+            }
         }
 
         // Increment ahead of time since we're going to erase this iterator from the list.
         auto it_to_prune = it++;
 
-        // The sequence number check ensures that all readers have read all logs in this chunk, but
-        // they may still hold a reference to the chunk to track their last read log_position.
+        // Readers may have a reference to the chunk to track their last read log_position.
         // Notify them to delete the reference.
         NotifyReadersOfPrune(log_id, it_to_prune);
 
@@ -183,39 +189,11 @@
             RemoveChunkFromStats(log_id, *it_to_prune);
             log_buffer.erase(it_to_prune);
             if (buffer_size >= bytes_to_free) {
-                return true;
+                return;
             }
             bytes_to_free -= buffer_size;
         }
     }
-
-    // If we've deleted all buffers without bytes_to_free hitting 0, then we're called by Clear()
-    // and should return true.
-    if (it == log_buffer.end()) {
-        return true;
-    }
-
-    // Otherwise we are stuck due to a reader, so mitigate it.
-    CHECK(oldest != nullptr);
-    KickReader(oldest, log_id, bytes_to_free);
-    return false;
-}
-
-// If the selected reader is blocking our pruning progress, decide on
-// what kind of mitigation is necessary to unblock the situation.
-void SerializedLogBuffer::KickReader(LogReaderThread* reader, log_id_t id, size_t bytes_to_free) {
-    if (bytes_to_free >= max_size_[id]) {  // +100%
-        // A misbehaving or slow reader is dropped if we hit too much memory pressure.
-        LOG(WARNING) << "Kicking blocked reader, " << reader->name()
-                     << ", from LogBuffer::kickMe()";
-        reader->release_Locked();
-    } else {
-        // Tell slow reader to skip entries to catch up.
-        unsigned long prune_rows = bytes_to_free / 300;
-        LOG(WARNING) << "Skipping " << prune_rows << " entries from slow reader, " << reader->name()
-                     << ", from LogBuffer::kickMe()";
-        reader->triggerSkip_Locked(id, prune_rows);
-    }
 }
 
 std::unique_ptr<FlushToState> SerializedLogBuffer::CreateFlushToState(uint64_t start,
@@ -256,12 +234,18 @@
             }
         }
 
+        // We copy the log entry such that we can flush it without the lock.  We never block pruning
+        // waiting for this Flush() to complete.
+        constexpr size_t kMaxEntrySize = sizeof(*entry) + LOGGER_ENTRY_MAX_PAYLOAD + 1;
+        unsigned char entry_copy[kMaxEntrySize] __attribute__((uninitialized));
+        CHECK_LT(entry->msg_len(), LOGGER_ENTRY_MAX_PAYLOAD + 1);
+        memcpy(entry_copy, entry, sizeof(*entry) + entry->msg_len());
         lock.unlock();
-        // We never prune logs equal to or newer than any LogReaderThreads' `start` value, so the
-        // `entry` pointer is safe here without the lock
-        if (!entry->Flush(writer, log_id)) {
+
+        if (!reinterpret_cast<SerializedLogEntry*>(entry_copy)->Flush(writer, log_id)) {
             return false;
         }
+
         lock.lock();
     }
 
@@ -270,38 +254,11 @@
 }
 
 bool SerializedLogBuffer::Clear(log_id_t id, uid_t uid) {
-    // Try three times to clear, then disconnect the readers and try one final time.
-    for (int retry = 0; retry < 3; ++retry) {
-        {
-            auto lock = std::lock_guard{lock_};
-            bool prune_success = Prune(id, ULONG_MAX, uid);
-            if (prune_success) {
-                return true;
-            }
-        }
-        sleep(1);
-    }
-    // Check if it is still busy after the sleep, we try to prune one entry, not another clear run,
-    // so we are looking for the quick side effect of the return value to tell us if we have a
-    // _blocked_ reader.
-    bool busy = false;
-    {
-        auto lock = std::lock_guard{lock_};
-        busy = !Prune(id, 1, uid);
-    }
-    // It is still busy, disconnect all readers.
-    if (busy) {
-        auto reader_threads_lock = std::lock_guard{reader_list_->reader_threads_lock()};
-        for (const auto& reader_thread : reader_list_->reader_threads()) {
-            if (reader_thread->IsWatching(id)) {
-                LOG(WARNING) << "Kicking blocked reader, " << reader_thread->name()
-                             << ", from LogBuffer::clear()";
-                reader_thread->release_Locked();
-            }
-        }
-    }
     auto lock = std::lock_guard{lock_};
-    return Prune(id, ULONG_MAX, uid);
+    Prune(id, ULONG_MAX, uid);
+
+    // Clearing SerializedLogBuffer never waits for readers and therefore is always successful.
+    return true;
 }
 
 size_t SerializedLogBuffer::GetSizeUsed(log_id_t id) {
commit	3932a9c0747b6aece517652ea0aba4be6c8df9b6	[log] [tgz]
author	Tom Cherry <tomcherry@google.com>	Wed Aug 12 09:36:15 2020 -0700
committer	Tom Cherry <tomcherry@google.com>	Wed Aug 12 12:49:56 2020 -0700
tree	868eeaedd0f437366a9173ba0351bd120d627c7b
parent	d0e8451d59e41a8435daaca6c1702154f3d418de [diff] [blame]