Revert "Revert "Make system property reads wait-free""

This reverts commit de39d9242ae38733c33173957386db0b8060e52a.

Reason for revert: This revert is not needed

Change-Id: I34af8e5d75c724f6c4066fafbfc6bc7d58377601
diff --git a/benchmarks/property_benchmark.cpp b/benchmarks/property_benchmark.cpp
index 77814bf..ba54ed1 100644
--- a/benchmarks/property_benchmark.cpp
+++ b/benchmarks/property_benchmark.cpp
@@ -184,7 +184,7 @@
 
   size_t i = 0;
   while (state.KeepRunning()) {
-    pa.system_properties().Serial(pinfo[i]);
+    __system_property_serial(pinfo[i]);
     i = (i + 1) % nprops;
   }
 
diff --git a/libc/bionic/system_property_api.cpp b/libc/bionic/system_property_api.cpp
index 051bc4c..a641f12 100644
--- a/libc/bionic/system_property_api.cpp
+++ b/libc/bionic/system_property_api.cpp
@@ -100,7 +100,13 @@
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 uint32_t __system_property_serial(const prop_info* pi) {
-  return system_properties.Serial(pi);
+  // N.B. a previous version of this function was much heavier-weight
+  // and enforced acquire semantics, so give our load here acquire
+  // semantics just in case somebody depends on
+  // __system_property_serial enforcing memory order, e.g., in case
+  // someone spins on the result of this function changing before
+  // loading some value.
+  return atomic_load_explicit(&pi->serial, memory_order_acquire);
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
diff --git a/libc/system_properties/include/system_properties/prop_area.h b/libc/system_properties/include/system_properties/prop_area.h
index a69f90e..53b2745 100644
--- a/libc/system_properties/include/system_properties/prop_area.h
+++ b/libc/system_properties/include/system_properties/prop_area.h
@@ -106,6 +106,20 @@
     memset(reserved_, 0, sizeof(reserved_));
     // Allocate enough space for the root node.
     bytes_used_ = sizeof(prop_bt);
+    // To make property reads wait-free, we reserve a
+    // PROP_VALUE_MAX-sized block of memory, the "dirty backup area",
+    // just after the root node. When we're about to modify a
+    // property, we copy the old value into the dirty backup area and
+    // copy the new value into the prop_info structure. Before
+    // starting the latter copy, we mark the property's serial as
+    // being dirty. If a reader comes along while we're doing the
+    // property update and sees a dirty serial, the reader copies from
+    // the dirty backup area instead of the property value
+    // proper. After the copy, the reader checks whether the property
+    // serial is the same: if it is, the dirty backup area hasn't been
+    // reused for something else and we can complete the
+    // read immediately.
+    bytes_used_ +=  __BIONIC_ALIGN(PROP_VALUE_MAX, sizeof(uint_least32_t));
   }
 
   const prop_info* find(const char* name);
@@ -122,6 +136,9 @@
   uint32_t version() const {
     return version_;
   }
+  char* dirty_backup_area() {
+    return data_ + sizeof (prop_bt);
+  }
 
  private:
   static prop_area* map_fd_ro(const int fd);
diff --git a/libc/system_properties/include/system_properties/system_properties.h b/libc/system_properties/include/system_properties/system_properties.h
index cad29cc..0666e28 100644
--- a/libc/system_properties/include/system_properties/system_properties.h
+++ b/libc/system_properties/include/system_properties/system_properties.h
@@ -66,7 +66,6 @@
   int Get(const char* name, char* value);
   int Update(prop_info* pi, const char* value, unsigned int len);
   int Add(const char* name, unsigned int namelen, const char* value, unsigned int valuelen);
-  uint32_t Serial(const prop_info* pi);
   uint32_t WaitAny(uint32_t old_serial);
   bool Wait(const prop_info* pi, uint32_t old_serial, uint32_t* new_serial_ptr,
             const timespec* relative_timeout);
@@ -74,6 +73,8 @@
   int Foreach(void (*propfn)(const prop_info* pi, void* cookie), void* cookie);
 
  private:
+  uint32_t ReadMutablePropertyValue(const prop_info* pi, char* value);
+
   // We don't want to use new or malloc in properties (b/31659220), and we don't want to waste a
   // full page by using mmap(), so we set aside enough space to create any context of the three
   // contexts.
diff --git a/libc/system_properties/system_properties.cpp b/libc/system_properties/system_properties.cpp
index d7c441b..8404778 100644
--- a/libc/system_properties/system_properties.cpp
+++ b/libc/system_properties/system_properties.cpp
@@ -140,42 +140,58 @@
   return strncmp(name, "ro.", 3) == 0;
 }
 
-int SystemProperties::Read(const prop_info* pi, char* name, char* value) {
-  while (true) {
-    uint32_t serial = Serial(pi);  // acquire semantics
-    size_t len = SERIAL_VALUE_LEN(serial);
-    memcpy(value, pi->value, len + 1);
-    // TODO: Fix the synchronization scheme here.
-    // There is no fully supported way to implement this kind
-    // of synchronization in C++11, since the memcpy races with
-    // updates to pi, and the data being accessed is not atomic.
-    // The following fence is unintuitive, but would be the
-    // correct one if memcpy used memory_order_relaxed atomic accesses.
-    // In practice it seems unlikely that the generated code would
-    // would be any different, so this should be OK.
+uint32_t SystemProperties::ReadMutablePropertyValue(const prop_info* pi, char* value) {
+  // We assume the memcpy below gets serialized by the acquire fence.
+  uint32_t new_serial = load_const_atomic(&pi->serial, memory_order_acquire);
+  uint32_t serial;
+  unsigned int len;
+  for (;;) {
+    serial = new_serial;
+    len = SERIAL_VALUE_LEN(serial);
+    if (__predict_false(SERIAL_DIRTY(serial))) {
+      // See the comment in the prop_area constructor.
+      prop_area* pa = contexts_->GetPropAreaForName(pi->name);
+      memcpy(value, pa->dirty_backup_area(), len + 1);
+    } else {
+      memcpy(value, pi->value, len + 1);
+    }
     atomic_thread_fence(memory_order_acquire);
-    if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) {
-      if (name != nullptr) {
-        size_t namelen = strlcpy(name, pi->name, PROP_NAME_MAX);
-        if (namelen >= PROP_NAME_MAX) {
-          async_safe_format_log(ANDROID_LOG_ERROR, "libc",
-                                "The property name length for \"%s\" is >= %d;"
-                                " please use __system_property_read_callback"
-                                " to read this property. (the name is truncated to \"%s\")",
-                                pi->name, PROP_NAME_MAX - 1, name);
-        }
-      }
-      if (is_read_only(pi->name) && pi->is_long()) {
-        async_safe_format_log(
-            ANDROID_LOG_ERROR, "libc",
-            "The property \"%s\" has a value with length %zu that is too large for"
-            " __system_property_get()/__system_property_read(); use"
-            " __system_property_read_callback() instead.",
-            pi->name, strlen(pi->long_value()));
-      }
-      return len;
+    new_serial = load_const_atomic(&pi->serial, memory_order_relaxed);
+    if (__predict_true(serial == new_serial)) {
+      break;
+    }
+    // We need another fence here because we want to ensure that the memcpy in the
+    // next iteration of the loop occurs after the load of new_serial above. We could
+    // get this guarantee by making the load_const_atomic of new_serial
+    // memory_order_acquire instead of memory_order_relaxed, but then we'd pay the
+    // penalty of the memory_order_acquire even in the overwhelmingly common case
+    // that the serial number didn't change.
+    atomic_thread_fence(memory_order_acquire);
+  }
+  return serial;
+}
+
+int SystemProperties::Read(const prop_info* pi, char* name, char* value) {
+  uint32_t serial = ReadMutablePropertyValue(pi, value);
+  if (name != nullptr) {
+    size_t namelen = strlcpy(name, pi->name, PROP_NAME_MAX);
+    if (namelen >= PROP_NAME_MAX) {
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                            "The property name length for \"%s\" is >= %d;"
+                            " please use __system_property_read_callback"
+                            " to read this property. (the name is truncated to \"%s\")",
+                            pi->name, PROP_NAME_MAX - 1, name);
     }
   }
+  if (is_read_only(pi->name) && pi->is_long()) {
+    async_safe_format_log(
+        ANDROID_LOG_ERROR, "libc",
+        "The property \"%s\" has a value with length %zu that is too large for"
+        " __system_property_get()/__system_property_read(); use"
+        " __system_property_read_callback() instead.",
+        pi->name, strlen(pi->long_value()));
+  }
+  return SERIAL_VALUE_LEN(serial);
 }
 
 void SystemProperties::ReadCallback(const prop_info* pi,
@@ -183,9 +199,9 @@
                                                      const char* value, uint32_t serial),
                                     void* cookie) {
   // Read only properties don't need to copy the value to a temporary buffer, since it can never
-  // change.
+  // change.  We use relaxed memory order on the serial load for the same reason.
   if (is_read_only(pi->name)) {
-    uint32_t serial = Serial(pi);
+    uint32_t serial = load_const_atomic(&pi->serial, memory_order_relaxed);
     if (pi->is_long()) {
       callback(cookie, pi->name, pi->long_value(), serial);
     } else {
@@ -194,21 +210,9 @@
     return;
   }
 
-  while (true) {
-    uint32_t serial = Serial(pi);  // acquire semantics
-    size_t len = SERIAL_VALUE_LEN(serial);
-    char value_buf[len + 1];
-
-    memcpy(value_buf, pi->value, len);
-    value_buf[len] = '\0';
-
-    // TODO: see todo in Read function
-    atomic_thread_fence(memory_order_acquire);
-    if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) {
-      callback(cookie, pi->name, value_buf, serial);
-      return;
-    }
-  }
+  char value_buf[PROP_VALUE_MAX];
+  uint32_t serial = ReadMutablePropertyValue(pi, value_buf);
+  callback(cookie, pi->name, value_buf, serial);
 }
 
 int SystemProperties::Get(const char* name, char* value) {
@@ -231,26 +235,37 @@
     return -1;
   }
 
-  prop_area* pa = contexts_->GetSerialPropArea();
-  if (!pa) {
+  prop_area* serial_pa = contexts_->GetSerialPropArea();
+  if (!serial_pa) {
+    return -1;
+  }
+  prop_area* pa = contexts_->GetPropAreaForName(pi->name);
+  if (__predict_false(!pa)) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "Could not find area for \"%s\"", pi->name);
     return -1;
   }
 
   uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed);
+  unsigned int old_len = SERIAL_VALUE_LEN(serial);
+
+  // The contract with readers is that whenever the dirty bit is set, an undamaged copy
+  // of the pre-dirty value is available in the dirty backup area. The fence ensures
+  // that we publish our dirty area update before allowing readers to see a
+  // dirty serial.
+  memcpy(pa->dirty_backup_area(), pi->value, old_len + 1);
+  atomic_thread_fence(memory_order_release);
   serial |= 1;
   atomic_store_explicit(&pi->serial, serial, memory_order_relaxed);
-  // The memcpy call here also races.  Again pretend it
-  // used memory_order_relaxed atomics, and use the analogous
-  // counterintuitive fence.
-  atomic_thread_fence(memory_order_release);
   strlcpy(pi->value, value, len + 1);
-
-  atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release);
-  __futex_wake(&pi->serial, INT32_MAX);
-
-  atomic_store_explicit(pa->serial(), atomic_load_explicit(pa->serial(), memory_order_relaxed) + 1,
+  // Now the primary value property area is up-to-date. Let readers know that they should
+  // look at the property value instead of the backup area.
+  atomic_thread_fence(memory_order_release);
+  atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_relaxed);
+  __futex_wake(&pi->serial, INT32_MAX);  // Fence by side effect
+  atomic_store_explicit(serial_pa->serial(),
+                        atomic_load_explicit(serial_pa->serial(), memory_order_relaxed) + 1,
                         memory_order_release);
-  __futex_wake(pa->serial(), INT32_MAX);
+  __futex_wake(serial_pa->serial(), INT32_MAX);
 
   return 0;
 }
@@ -294,16 +309,6 @@
   return 0;
 }
 
-// Wait for non-locked serial, and retrieve it with acquire semantics.
-uint32_t SystemProperties::Serial(const prop_info* pi) {
-  uint32_t serial = load_const_atomic(&pi->serial, memory_order_acquire);
-  while (SERIAL_DIRTY(serial)) {
-    __futex_wait(const_cast<_Atomic(uint_least32_t)*>(&pi->serial), serial, nullptr);
-    serial = load_const_atomic(&pi->serial, memory_order_acquire);
-  }
-  return serial;
-}
-
 uint32_t SystemProperties::WaitAny(uint32_t old_serial) {
   uint32_t new_serial;
   Wait(nullptr, old_serial, &new_serial, nullptr);
diff --git a/tests/system_properties_test.cpp b/tests/system_properties_test.cpp
index 245e42f..6d696c7 100644
--- a/tests/system_properties_test.cpp
+++ b/tests/system_properties_test.cpp
@@ -340,9 +340,9 @@
     ASSERT_EQ(0, system_properties.Add("property", 8, "value1", 6));
     const prop_info* pi = system_properties.Find("property");
     ASSERT_TRUE(pi != nullptr);
-    unsigned serial = system_properties.Serial(pi);
+    unsigned serial = __system_property_serial(pi);
     ASSERT_EQ(0, system_properties.Update(const_cast<prop_info*>(pi), "value2", 6));
-    ASSERT_NE(serial, system_properties.Serial(pi));
+    ASSERT_NE(serial, __system_property_serial(pi));
 #else // __BIONIC__
     GTEST_SKIP() << "bionic-only test";
 #endif // __BIONIC__
@@ -389,7 +389,7 @@
     prop_info* pi = const_cast<prop_info*>(system_properties.Find("property"));
     ASSERT_TRUE(pi != nullptr);
 
-    unsigned serial = system_properties.Serial(pi);
+    unsigned serial = __system_property_serial(pi);
 
     std::thread thread([&system_properties]() {
         prop_info* pi = const_cast<prop_info*>(system_properties.Find("property"));