Make system property reads wait-free

Right now, when we read a system property, we first (assuming we've
already looked up the property's prop_info) read the property's serial
number; if we find that the low bit (the dirty bit) in the serial
number is set, we futex-wait for that serial number to become
non-dirty. By doing so, we spare readers from seeing partially-updated
property values if they race with the property service's non-atomic
memcpy to the property value slot. (The futex-wait here isn't
essential to the algorithm: spinning while dirty would suffice,
although it'd be somewhat less efficient.)

The problem with this approach is that readers can wait on the
property service process, potentially causing delays due to scheduling
variance. Property reads are not guaranteed to complete in finite time
right now.

This change makes property reads wait-free and ensures that they
complete in finite time in all cases. In the new approach, we prevent
value tearing by backing up each property we're about to modify and
directing readers to the backup copy if they try to read a property
with the dirty bit set.

(The wait freedom is limited to the case of readers racing against
*one* property update. A writer can still delay readers by rapidly
updating a property --- but after this change, readers can't hang due
to PID 1 scheduling delays.)

I considered adding explicit atomic access to short property values,
but between binary compatibility with the existing property database
and the need to carefully handle transitions of property values
between "short" (compatible with atomics) and "long" (incompatible
with atomics) length domains, I figured the complexity wasn't worth it
and that making property reads wait-free would be adequate.

Test: boots
Bug: 143561649
Change-Id: Ifd3108aedba5a4b157b66af6ca0a4ed084bd5982
diff --git a/libc/bionic/system_property_api.cpp b/libc/bionic/system_property_api.cpp
index 051bc4c..a641f12 100644
--- a/libc/bionic/system_property_api.cpp
+++ b/libc/bionic/system_property_api.cpp
@@ -100,7 +100,13 @@
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 uint32_t __system_property_serial(const prop_info* pi) {
-  return system_properties.Serial(pi);
+  // N.B. a previous version of this function was much heavier-weight
+  // and enforced acquire semantics, so give our load here acquire
+  // semantics just in case somebody depends on
+  // __system_property_serial enforcing memory order, e.g., in case
+  // someone spins on the result of this function changing before
+  // loading some value.
+  return atomic_load_explicit(&pi->serial, memory_order_acquire);
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
diff --git a/libc/system_properties/include/system_properties/prop_area.h b/libc/system_properties/include/system_properties/prop_area.h
index a69f90e..53b2745 100644
--- a/libc/system_properties/include/system_properties/prop_area.h
+++ b/libc/system_properties/include/system_properties/prop_area.h
@@ -106,6 +106,20 @@
     memset(reserved_, 0, sizeof(reserved_));
     // Allocate enough space for the root node.
     bytes_used_ = sizeof(prop_bt);
+    // To make property reads wait-free, we reserve a
+    // PROP_VALUE_MAX-sized block of memory, the "dirty backup area",
+    // just after the root node. When we're about to modify a
+    // property, we copy the old value into the dirty backup area and
+    // copy the new value into the prop_info structure. Before
+    // starting the latter copy, we mark the property's serial as
+    // being dirty. If a reader comes along while we're doing the
+    // property update and sees a dirty serial, the reader copies from
+    // the dirty backup area instead of the property value
+    // proper. After the copy, the reader checks whether the property
+    // serial is the same: if it is, the dirty backup area hasn't been
+    // reused for something else and we can complete the
+    // read immediately.
+    bytes_used_ +=  __BIONIC_ALIGN(PROP_VALUE_MAX, sizeof(uint_least32_t));
   }
 
   const prop_info* find(const char* name);
@@ -122,6 +136,9 @@
   uint32_t version() const {
     return version_;
   }
+  char* dirty_backup_area() {
+    return data_ + sizeof (prop_bt);
+  }
 
  private:
   static prop_area* map_fd_ro(const int fd);
diff --git a/libc/system_properties/include/system_properties/system_properties.h b/libc/system_properties/include/system_properties/system_properties.h
index cad29cc..0666e28 100644
--- a/libc/system_properties/include/system_properties/system_properties.h
+++ b/libc/system_properties/include/system_properties/system_properties.h
@@ -66,7 +66,6 @@
   int Get(const char* name, char* value);
   int Update(prop_info* pi, const char* value, unsigned int len);
   int Add(const char* name, unsigned int namelen, const char* value, unsigned int valuelen);
-  uint32_t Serial(const prop_info* pi);
   uint32_t WaitAny(uint32_t old_serial);
   bool Wait(const prop_info* pi, uint32_t old_serial, uint32_t* new_serial_ptr,
             const timespec* relative_timeout);
@@ -74,6 +73,8 @@
   int Foreach(void (*propfn)(const prop_info* pi, void* cookie), void* cookie);
 
  private:
+  uint32_t ReadMutablePropertyValue(const prop_info* pi, char* value);
+
   // We don't want to use new or malloc in properties (b/31659220), and we don't want to waste a
   // full page by using mmap(), so we set aside enough space to create any context of the three
   // contexts.
diff --git a/libc/system_properties/system_properties.cpp b/libc/system_properties/system_properties.cpp
index d7c441b..8404778 100644
--- a/libc/system_properties/system_properties.cpp
+++ b/libc/system_properties/system_properties.cpp
@@ -140,42 +140,58 @@
   return strncmp(name, "ro.", 3) == 0;
 }
 
-int SystemProperties::Read(const prop_info* pi, char* name, char* value) {
-  while (true) {
-    uint32_t serial = Serial(pi);  // acquire semantics
-    size_t len = SERIAL_VALUE_LEN(serial);
-    memcpy(value, pi->value, len + 1);
-    // TODO: Fix the synchronization scheme here.
-    // There is no fully supported way to implement this kind
-    // of synchronization in C++11, since the memcpy races with
-    // updates to pi, and the data being accessed is not atomic.
-    // The following fence is unintuitive, but would be the
-    // correct one if memcpy used memory_order_relaxed atomic accesses.
-    // In practice it seems unlikely that the generated code would
-    // would be any different, so this should be OK.
+uint32_t SystemProperties::ReadMutablePropertyValue(const prop_info* pi, char* value) {
+  // We assume the memcpy below gets serialized by the acquire fence.
+  uint32_t new_serial = load_const_atomic(&pi->serial, memory_order_acquire);
+  uint32_t serial;
+  unsigned int len;
+  for (;;) {
+    serial = new_serial;
+    len = SERIAL_VALUE_LEN(serial);
+    if (__predict_false(SERIAL_DIRTY(serial))) {
+      // See the comment in the prop_area constructor.
+      prop_area* pa = contexts_->GetPropAreaForName(pi->name);
+      memcpy(value, pa->dirty_backup_area(), len + 1);
+    } else {
+      memcpy(value, pi->value, len + 1);
+    }
     atomic_thread_fence(memory_order_acquire);
-    if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) {
-      if (name != nullptr) {
-        size_t namelen = strlcpy(name, pi->name, PROP_NAME_MAX);
-        if (namelen >= PROP_NAME_MAX) {
-          async_safe_format_log(ANDROID_LOG_ERROR, "libc",
-                                "The property name length for \"%s\" is >= %d;"
-                                " please use __system_property_read_callback"
-                                " to read this property. (the name is truncated to \"%s\")",
-                                pi->name, PROP_NAME_MAX - 1, name);
-        }
-      }
-      if (is_read_only(pi->name) && pi->is_long()) {
-        async_safe_format_log(
-            ANDROID_LOG_ERROR, "libc",
-            "The property \"%s\" has a value with length %zu that is too large for"
-            " __system_property_get()/__system_property_read(); use"
-            " __system_property_read_callback() instead.",
-            pi->name, strlen(pi->long_value()));
-      }
-      return len;
+    new_serial = load_const_atomic(&pi->serial, memory_order_relaxed);
+    if (__predict_true(serial == new_serial)) {
+      break;
+    }
+    // We need another fence here because we want to ensure that the memcpy in the
+    // next iteration of the loop occurs after the load of new_serial above. We could
+    // get this guarantee by making the load_const_atomic of new_serial
+    // memory_order_acquire instead of memory_order_relaxed, but then we'd pay the
+    // penalty of the memory_order_acquire even in the overwhelmingly common case
+    // that the serial number didn't change.
+    atomic_thread_fence(memory_order_acquire);
+  }
+  return serial;
+}
+
+int SystemProperties::Read(const prop_info* pi, char* name, char* value) {
+  uint32_t serial = ReadMutablePropertyValue(pi, value);
+  if (name != nullptr) {
+    size_t namelen = strlcpy(name, pi->name, PROP_NAME_MAX);
+    if (namelen >= PROP_NAME_MAX) {
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                            "The property name length for \"%s\" is >= %d;"
+                            " please use __system_property_read_callback"
+                            " to read this property. (the name is truncated to \"%s\")",
+                            pi->name, PROP_NAME_MAX - 1, name);
     }
   }
+  if (is_read_only(pi->name) && pi->is_long()) {
+    async_safe_format_log(
+        ANDROID_LOG_ERROR, "libc",
+        "The property \"%s\" has a value with length %zu that is too large for"
+        " __system_property_get()/__system_property_read(); use"
+        " __system_property_read_callback() instead.",
+        pi->name, strlen(pi->long_value()));
+  }
+  return SERIAL_VALUE_LEN(serial);
 }
 
 void SystemProperties::ReadCallback(const prop_info* pi,
@@ -183,9 +199,9 @@
                                                      const char* value, uint32_t serial),
                                     void* cookie) {
   // Read only properties don't need to copy the value to a temporary buffer, since it can never
-  // change.
+  // change.  We use relaxed memory order on the serial load for the same reason.
   if (is_read_only(pi->name)) {
-    uint32_t serial = Serial(pi);
+    uint32_t serial = load_const_atomic(&pi->serial, memory_order_relaxed);
     if (pi->is_long()) {
       callback(cookie, pi->name, pi->long_value(), serial);
     } else {
@@ -194,21 +210,9 @@
     return;
   }
 
-  while (true) {
-    uint32_t serial = Serial(pi);  // acquire semantics
-    size_t len = SERIAL_VALUE_LEN(serial);
-    char value_buf[len + 1];
-
-    memcpy(value_buf, pi->value, len);
-    value_buf[len] = '\0';
-
-    // TODO: see todo in Read function
-    atomic_thread_fence(memory_order_acquire);
-    if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) {
-      callback(cookie, pi->name, value_buf, serial);
-      return;
-    }
-  }
+  char value_buf[PROP_VALUE_MAX];
+  uint32_t serial = ReadMutablePropertyValue(pi, value_buf);
+  callback(cookie, pi->name, value_buf, serial);
 }
 
 int SystemProperties::Get(const char* name, char* value) {
@@ -231,26 +235,37 @@
     return -1;
   }
 
-  prop_area* pa = contexts_->GetSerialPropArea();
-  if (!pa) {
+  prop_area* serial_pa = contexts_->GetSerialPropArea();
+  if (!serial_pa) {
+    return -1;
+  }
+  prop_area* pa = contexts_->GetPropAreaForName(pi->name);
+  if (__predict_false(!pa)) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "Could not find area for \"%s\"", pi->name);
     return -1;
   }
 
   uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed);
+  unsigned int old_len = SERIAL_VALUE_LEN(serial);
+
+  // The contract with readers is that whenever the dirty bit is set, an undamaged copy
+  // of the pre-dirty value is available in the dirty backup area. The fence ensures
+  // that we publish our dirty area update before allowing readers to see a
+  // dirty serial.
+  memcpy(pa->dirty_backup_area(), pi->value, old_len + 1);
+  atomic_thread_fence(memory_order_release);
   serial |= 1;
   atomic_store_explicit(&pi->serial, serial, memory_order_relaxed);
-  // The memcpy call here also races.  Again pretend it
-  // used memory_order_relaxed atomics, and use the analogous
-  // counterintuitive fence.
-  atomic_thread_fence(memory_order_release);
   strlcpy(pi->value, value, len + 1);
-
-  atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release);
-  __futex_wake(&pi->serial, INT32_MAX);
-
-  atomic_store_explicit(pa->serial(), atomic_load_explicit(pa->serial(), memory_order_relaxed) + 1,
+  // Now the primary value property area is up-to-date. Let readers know that they should
+  // look at the property value instead of the backup area.
+  atomic_thread_fence(memory_order_release);
+  atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_relaxed);
+  __futex_wake(&pi->serial, INT32_MAX);  // Fence by side effect
+  atomic_store_explicit(serial_pa->serial(),
+                        atomic_load_explicit(serial_pa->serial(), memory_order_relaxed) + 1,
                         memory_order_release);
-  __futex_wake(pa->serial(), INT32_MAX);
+  __futex_wake(serial_pa->serial(), INT32_MAX);
 
   return 0;
 }
@@ -294,16 +309,6 @@
   return 0;
 }
 
-// Wait for non-locked serial, and retrieve it with acquire semantics.
-uint32_t SystemProperties::Serial(const prop_info* pi) {
-  uint32_t serial = load_const_atomic(&pi->serial, memory_order_acquire);
-  while (SERIAL_DIRTY(serial)) {
-    __futex_wait(const_cast<_Atomic(uint_least32_t)*>(&pi->serial), serial, nullptr);
-    serial = load_const_atomic(&pi->serial, memory_order_acquire);
-  }
-  return serial;
-}
-
 uint32_t SystemProperties::WaitAny(uint32_t old_serial) {
   uint32_t new_serial;
   Wait(nullptr, old_serial, &new_serial, nullptr);