Merge "libm: arm: add arm specific sqrt and sqrtf"
diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
index 90aa7b8..e5a170f 100644
--- a/libc/bionic/locale.cpp
+++ b/libc/bionic/locale.cpp
@@ -36,6 +36,7 @@
 #include <wchar.h>
 
 #include "private/bionic_macros.h"
+#include "private/ThreadLocalBuffer.h"
 
 // We currently support a single locale, the "C" locale (also known as "POSIX").
 
@@ -62,10 +63,7 @@
 static lconv g_locale;
 
 // We don't use pthread_once for this so that we know when the resource (a TLS slot) will be taken.
-static pthread_key_t g_uselocale_key;
-__attribute__((constructor)) static void __bionic_tls_uselocale_key_init() {
-  pthread_key_create(&g_uselocale_key, NULL);
-}
+BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(g_uselocale_key, NULL);
 
 static void __locale_init() {
   g_locale.decimal_point = const_cast<char*>(".");
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index 0d63457..83243ab 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -27,6 +27,7 @@
  */
 
 #include <errno.h>
+#include <stdatomic.h>
 
 #include "pthread_internal.h"
 #include "private/bionic_futex.h"
@@ -52,11 +53,6 @@
  *  - This implementation will return EDEADLK in "write after write" and "read after
  *    write" cases and will deadlock in write after read case.
  *
- * TODO: VERY CAREFULLY convert this to use C++11 atomics when possible. All volatile
- * members of pthread_rwlock_t should be converted to atomics<> and __sync_bool_compare_and_swap
- * should be changed to compare_exchange_strong accompanied by the proper ordering
- * constraints (comments have been added with the intending ordering across the code).
- *
  * TODO: As it stands now, pending_readers and pending_writers could be merged into a
  * a single waiters variable.  Keeping them separate adds a bit of clarity and keeps
  * the door open for a writer-biased implementation.
@@ -105,8 +101,40 @@
   return 0;
 }
 
+static inline atomic_int* STATE_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
+    static_assert(sizeof(atomic_int) == sizeof(rwlock->state),
+                  "rwlock->state should actually be atomic_int in implementation.");
+
+    // We prefer casting to atomic_int instead of declaring rwlock->state to be atomic_int directly.
+    // Because using the second method pollutes pthread.h, and causes an error when compiling libcxx.
+    return reinterpret_cast<atomic_int*>(&rwlock->state);
+}
+
+static inline atomic_int* WRITER_THREAD_ID_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
+    static_assert(sizeof(atomic_int) == sizeof(rwlock->writer_thread_id),
+                  "rwlock->writer_thread_id should actually be atomic_int in implementation.");
+
+    return reinterpret_cast<atomic_int*>(&rwlock->writer_thread_id);
+}
+
+static inline atomic_uint* PENDING_READERS_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
+    static_assert(sizeof(atomic_uint) == sizeof(rwlock->pending_readers),
+                  "rwlock->pending_readers should actually be atomic_uint in implementation.");
+
+    return reinterpret_cast<atomic_uint*>(&rwlock->pending_readers);
+}
+
+static inline atomic_uint* PENDING_WRITERS_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
+    static_assert(sizeof(atomic_uint) == sizeof(rwlock->pending_writers),
+                  "rwlock->pending_writers should actually be atomic_uint in implementation.");
+
+    return reinterpret_cast<atomic_uint*>(&rwlock->pending_writers);
+}
+
 int pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t* attr) {
-  if (attr != NULL) {
+  if (__predict_true(attr == NULL)) {
+    rwlock->attr = 0;
+  } else {
     switch (*attr) {
       case PTHREAD_PROCESS_SHARED:
       case PTHREAD_PROCESS_PRIVATE:
@@ -117,10 +145,10 @@
     }
   }
 
-  rwlock->state = 0;
-  rwlock->pending_readers = 0;
-  rwlock->pending_writers = 0;
-  rwlock->writer_thread_id = 0;
+  atomic_init(STATE_ATOMIC_POINTER(rwlock), 0);
+  atomic_init(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), 0);
+  atomic_init(PENDING_READERS_ATOMIC_POINTER(rwlock), 0);
+  atomic_init(PENDING_WRITERS_ATOMIC_POINTER(rwlock), 0);
 
   return 0;
 }
@@ -133,72 +161,87 @@
 }
 
 static int __pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
-  if (__predict_false(__get_thread()->tid == rwlock->writer_thread_id)) {
+  if (__predict_false(__get_thread()->tid ==
+      atomic_load_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), memory_order_relaxed))) {
     return EDEADLK;
   }
 
   timespec ts;
   timespec* rel_timeout = (abs_timeout == NULL) ? NULL : &ts;
-  bool done = false;
-  do {
-    // This is actually a race read as there's nothing that guarantees the atomicity of integer
-    // reads / writes. However, in practice this "never" happens so until we switch to C++11 this
-    // should work fine. The same applies in the other places this idiom is used.
-    int32_t cur_state = rwlock->state;  // C++11 relaxed atomic read
+
+  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
+
+  while (true) {
+    int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
     if (__predict_true(cur_state >= 0)) {
-      // Add as an extra reader.
-      done = __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state + 1);  // C++11 memory_order_aquire
+      if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state + 1,
+                                                memory_order_acquire, memory_order_relaxed)) {
+        return 0;
+      }
     } else {
       if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
         return ETIMEDOUT;
       }
-      // Owner holds it in write mode, hang up.
-      // To avoid losing wake ups the pending_readers update and the state read should be
-      // sequentially consistent. (currently enforced by __sync_fetch_and_add which creates a full barrier)
-      __sync_fetch_and_add(&rwlock->pending_readers, 1);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
-      int ret = __futex_wait_ex(&rwlock->state, rwlock_is_shared(rwlock), cur_state, rel_timeout);
-      __sync_fetch_and_sub(&rwlock->pending_readers, 1);  // C++11 memory_order_relaxed
+      atomic_uint* pending_readers_ptr = PENDING_READERS_ATOMIC_POINTER(rwlock);
+
+      // To avoid losing wake ups, the pending_readers increment should be observed before
+      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
+      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
+      // operations in futex_wait.
+      atomic_fetch_add_explicit(pending_readers_ptr, 1, memory_order_relaxed);
+      atomic_thread_fence(memory_order_seq_cst);
+      int ret = __futex_wait_ex(state_ptr, rwlock_is_shared(rwlock), cur_state, rel_timeout);
+      atomic_fetch_sub_explicit(pending_readers_ptr, 1, memory_order_relaxed);
       if (ret == -ETIMEDOUT) {
         return ETIMEDOUT;
       }
     }
-  } while (!done);
-
-  return 0;
+  }
 }
 
 static int __pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
-  int tid = __get_thread()->tid;
-  if (__predict_false(tid == rwlock->writer_thread_id)) {
+  if (__predict_false(__get_thread()->tid ==
+      atomic_load_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), memory_order_relaxed))) {
     return EDEADLK;
   }
 
   timespec ts;
   timespec* rel_timeout = (abs_timeout == NULL) ? NULL : &ts;
-  bool done = false;
-  do {
-    int32_t cur_state = rwlock->state;
+
+  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
+
+  while (true) {
+    int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
     if (__predict_true(cur_state == 0)) {
-      // Change state from 0 to -1.
-      done =  __sync_bool_compare_and_swap(&rwlock->state, 0 /* cur state */, -1 /* new state */);  // C++11 memory_order_aquire
+      if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, -1,
+                                                memory_order_acquire, memory_order_relaxed)) {
+        // writer_thread_id is protected by rwlock and can only be modified in rwlock write
+        // owner thread. Other threads may read it for EDEADLK error checking, atomic operation
+        // is safe enough for it.
+        atomic_store_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), __get_thread()->tid,
+                              memory_order_relaxed);
+        return 0;
+      }
     } else {
       if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
         return ETIMEDOUT;
       }
-      // Failed to acquire, hang up.
-      // To avoid losing wake ups the pending_writers update and the state read should be
-      // sequentially consistent. (currently enforced by __sync_fetch_and_add which creates a full barrier)
-      __sync_fetch_and_add(&rwlock->pending_writers, 1);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
-      int ret = __futex_wait_ex(&rwlock->state, rwlock_is_shared(rwlock), cur_state, rel_timeout);
-      __sync_fetch_and_sub(&rwlock->pending_writers, 1);  // C++11 memory_order_relaxed
+
+      atomic_uint* pending_writers_ptr = PENDING_WRITERS_ATOMIC_POINTER(rwlock);
+
+      // To avoid losing wake ups, the pending_writers increment should be observed before
+      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
+      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
+      // operations in futex_wait.
+      atomic_fetch_add_explicit(pending_writers_ptr, 1, memory_order_relaxed);
+      atomic_thread_fence(memory_order_seq_cst);
+      int ret = __futex_wait_ex(state_ptr, rwlock_is_shared(rwlock), cur_state, rel_timeout);
+      atomic_fetch_sub_explicit(pending_writers_ptr, 1, memory_order_relaxed);
       if (ret == -ETIMEDOUT) {
         return ETIMEDOUT;
       }
     }
-  } while (!done);
-
-  rwlock->writer_thread_id = tid;
-  return 0;
+  }
 }
 
 int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
@@ -210,10 +253,14 @@
 }
 
 int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock) {
-  int32_t cur_state = rwlock->state;
-  if ((cur_state >= 0) &&
-      __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state + 1)) {  // C++11 memory_order_acquire
-    return 0;
+  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
+  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
+
+  while (cur_state >= 0) {
+    if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state + 1,
+                                              memory_order_acquire, memory_order_relaxed)) {
+      return 0;
+    }
   }
   return EBUSY;
 }
@@ -227,12 +274,16 @@
 }
 
 int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock) {
-  int tid = __get_thread()->tid;
-  int32_t cur_state = rwlock->state;
-  if ((cur_state == 0) &&
-      __sync_bool_compare_and_swap(&rwlock->state, 0 /* cur state */, -1 /* new state */)) {  // C++11 memory_order_acquire
-    rwlock->writer_thread_id = tid;
-    return 0;
+  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
+  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
+
+  while (cur_state == 0) {
+    if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, -1,
+                                              memory_order_acquire, memory_order_relaxed)) {
+      int tid = __get_thread()->tid;
+      atomic_store_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), tid, memory_order_relaxed);
+      return 0;
+    }
   }
   return EBUSY;
 }
@@ -240,42 +291,53 @@
 
 int pthread_rwlock_unlock(pthread_rwlock_t* rwlock) {
   int tid = __get_thread()->tid;
-  bool done = false;
-  do {
-    int32_t cur_state = rwlock->state;
-    if (cur_state == 0) {
+  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
+  atomic_uint* pending_readers_ptr = PENDING_READERS_ATOMIC_POINTER(rwlock);
+  atomic_uint* pending_writers_ptr = PENDING_WRITERS_ATOMIC_POINTER(rwlock);
+
+  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
+  if (__predict_false(cur_state == 0)) {
+    return EPERM;
+  } else if (cur_state == -1) {
+    atomic_int* writer_thread_id_ptr = WRITER_THREAD_ID_ATOMIC_POINTER(rwlock);
+    if (atomic_load_explicit(writer_thread_id_ptr, memory_order_relaxed) != tid) {
       return EPERM;
     }
-    if (cur_state == -1) {
-      if (rwlock->writer_thread_id != tid) {
+    // We're no longer the owner.
+    atomic_store_explicit(writer_thread_id_ptr, 0, memory_order_relaxed);
+    // Change state from -1 to 0.
+    atomic_store_explicit(state_ptr, 0, memory_order_release);
+    goto wakeup_waiters;
+
+  } else { // cur_state > 0
+    // Reduce state by 1.
+    while (!atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state - 1,
+                                                  memory_order_release, memory_order_relaxed)) {
+      if (cur_state <= 0) {
         return EPERM;
       }
-      // We're no longer the owner.
-      rwlock->writer_thread_id = 0;
-      // Change state from -1 to 0.
-      // We use __sync_bool_compare_and_swap to achieve sequential consistency of the state store and
-      // the following pendingX loads. A simple store with memory_order_release semantics
-      // is not enough to guarantee that the pendingX loads are not reordered before the
-      // store (which may lead to a lost wakeup).
-      __sync_bool_compare_and_swap( &rwlock->state, -1 /* cur state*/, 0 /* new state */);  // C++11 maybe memory_order_seq_cst?
-
-      // Wake any waiters.
-      if (__predict_false(rwlock->pending_readers > 0 || rwlock->pending_writers > 0)) {
-        __futex_wake_ex(&rwlock->state, rwlock_is_shared(rwlock), INT_MAX);
-      }
-      done = true;
-    } else { // cur_state > 0
-      // Reduce state by 1.
-      // See the comment above on why we need __sync_bool_compare_and_swap.
-      done = __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state - 1);  // C++11 maybe memory_order_seq_cst?
-      if (done && (cur_state - 1) == 0) {
-        // There are no more readers, wake any waiters.
-        if (__predict_false(rwlock->pending_readers > 0 || rwlock->pending_writers > 0)) {
-          __futex_wake_ex(&rwlock->state, rwlock_is_shared(rwlock), INT_MAX);
-        }
-      }
     }
-  } while (!done);
+    if (cur_state == 1) {
+      goto wakeup_waiters;
+    }
+  }
+  return 0;
 
+wakeup_waiters:
+  // To avoid losing wake ups, the update of state should be observed before reading
+  // pending_readers/pending_writers by all threads. Use read locking as an example:
+  //     read locking thread                        unlocking thread
+  //      pending_readers++;                         state = 0;
+  //      seq_cst fence                              seq_cst fence
+  //      read state for futex_wait                  read pending_readers for futex_wake
+  //
+  // So when locking and unlocking threads are running in parallel, we will not get
+  // in a situation that the locking thread reads state as negative and needs to wait,
+  // while the unlocking thread reads pending_readers as zero and doesn't need to wake up waiters.
+  atomic_thread_fence(memory_order_seq_cst);
+  if (__predict_false(atomic_load_explicit(pending_readers_ptr, memory_order_relaxed) > 0 ||
+                      atomic_load_explicit(pending_writers_ptr, memory_order_relaxed) > 0)) {
+    __futex_wake_ex(state_ptr, rwlock_is_shared(rwlock), INT_MAX);
+  }
   return 0;
 }
diff --git a/libc/dns/resolv/res_state.c b/libc/dns/resolv/res_state.c
index 7533d19..459f073 100644
--- a/libc/dns/resolv/res_state.c
+++ b/libc/dns/resolv/res_state.c
@@ -39,6 +39,8 @@
 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
 #include <sys/_system_properties.h>
 
+#include "private/ThreadLocalBuffer.h"
+
 /* Set to 1 to enable debug traces */
 #define DEBUG 0
 
@@ -50,8 +52,6 @@
 #  define D(...)  do{}while(0)
 #endif
 
-static pthread_key_t   _res_key;
-
 typedef struct {
     int                  _h_errno;
     // TODO: Have one __res_state per network so we don't have to repopulate frequently.
@@ -105,12 +105,7 @@
     free(rt);
 }
 
-__attribute__((constructor))
-static void
-_res_init_key( void )
-{
-    pthread_key_create( &_res_key, _res_thread_free );
-}
+BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(_res_key, _res_thread_free);
 
 static _res_thread*
 _res_thread_get(void)
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 212551b..1fe61e2 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -91,10 +91,10 @@
   pthread_mutex_t __unused_lock;
   pthread_cond_t __unused_cond;
 #endif
-  volatile int32_t state; // 0=unlock, -1=writer lock, +n=reader lock
-  volatile int32_t writer_thread_id;
-  volatile int32_t pending_readers;
-  volatile int32_t pending_writers;
+  int32_t state; // 0=unlock, -1=writer lock, +n=reader lock
+  int32_t writer_thread_id;
+  uint32_t pending_readers;
+  uint32_t pending_writers;
   int32_t attr;
 #ifdef __LP64__
   char __reserved[36];
diff --git a/libc/private/ThreadLocalBuffer.h b/libc/private/ThreadLocalBuffer.h
index e5bd28c..cc47317 100644
--- a/libc/private/ThreadLocalBuffer.h
+++ b/libc/private/ThreadLocalBuffer.h
@@ -38,15 +38,17 @@
 
 // We used to use pthread_once to initialize the keys, but life is more predictable
 // if we allocate them all up front when the C library starts up, via __constructor__.
+#define BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(key_name, key_destructor) \
+  static pthread_key_t key_name; \
+  __attribute__((constructor)) static void __bionic_tls_ ## key_name ## _key_init() { \
+    pthread_key_create(&key_name, key_destructor); \
+  }
 
 #define GLOBAL_INIT_THREAD_LOCAL_BUFFER(name) \
-  static pthread_key_t __bionic_tls_ ## name ## _key; \
   static void __bionic_tls_ ## name ## _key_destroy(void* buffer) { \
     free(buffer); \
   } \
-  __attribute__((constructor)) static void __bionic_tls_ ## name ## _key_init() { \
-    pthread_key_create(&__bionic_tls_ ## name ## _key, __bionic_tls_ ## name ## _key_destroy); \
-  }
+  BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(__bionic_tls_ ## name ## _key, __bionic_tls_ ## name ## _key_destroy)
 
 // Leaves "name_tls_buffer" and "name_tls_buffer_size" defined and initialized.
 #define LOCAL_INIT_THREAD_LOCAL_BUFFER(type, name, byte_count) \
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 04f5fd2..724f896 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -78,7 +78,7 @@
  * Following are current pthread keys used internally by libc:
  *  basename               libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
  *  dirname                libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  uselocale              libc
+ *  uselocale              libc (BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR)
  *  getmntent_mntent       libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
  *  getmntent_strings      libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
  *  ptsname                libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
@@ -87,7 +87,7 @@
  *  strsignal              libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
  *  passwd                 libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
  *  group                  libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  _res_key               libc
+ *  _res_key               libc (BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR)
  */
 
 #define LIBC_PTHREAD_KEY_RESERVED_COUNT 12
diff --git a/tests/fortify_sprintf_warnings.cpp b/tests/fortify_sprintf_warnings.cpp
index 3a2d3c4..537b341 100644
--- a/tests/fortify_sprintf_warnings.cpp
+++ b/tests/fortify_sprintf_warnings.cpp
@@ -16,7 +16,16 @@
 
 #undef _FORTIFY_SOURCE
 #define _FORTIFY_SOURCE 2
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdarg.h>
 #include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
 
 void test_sprintf() {
   char buf[4];
@@ -24,12 +33,12 @@
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___sprintf_chk(char*, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  sprintf(buf, "foobar"); // NOLINT(runtime/printf)
+  sprintf(buf, "foobar");  // NOLINT(runtime/printf)
 
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___sprintf_chk(char*, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  sprintf(buf, "%s", "foobar"); // NOLINT(runtime/printf)
+  sprintf(buf, "%s", "foobar");  // NOLINT(runtime/printf)
 }
 
 void test_snprintf() {
@@ -38,20 +47,186 @@
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___snprintf_chk(char*, {{(long )?}}unsigned int, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  snprintf(buf, 5, "foobar"); // NOLINT(runtime/printf)
+  snprintf(buf, 5, "foobar");  // NOLINT(runtime/printf)
 
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___snprintf_chk(char*, {{(long )?}}unsigned int, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  snprintf(buf, 5, "%s", "foobar"); // NOLINT(runtime/printf)
+  snprintf(buf, 5, "%s", "foobar");  // NOLINT(runtime/printf)
 
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___snprintf_chk(char*, {{(long )?}}unsigned int, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  snprintf(buf, 5, " %s ", "foobar"); // NOLINT(runtime/printf)
+  snprintf(buf, 5, " %s ", "foobar");  // NOLINT(runtime/printf)
 
   // NOLINTNEXTLINE(whitespace/line_length)
   // GCC: warning: call to int __builtin___snprintf_chk(char*, {{(long )?}}unsigned int, int, {{(long )?}}unsigned int, const char*, ...) will always overflow destination buffer
   // clang should emit a warning, but doesn't
-  snprintf(buf, 5, "%d", 100000); // NOLINT(runtime/printf)
+  snprintf(buf, 5, "%d", 100000);  // NOLINT(runtime/printf)
+}
+
+void test_memcpy() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to void* __builtin___memcpy_chk(void*, const void*, {{(long )?}}unsigned int, {{(long )?}}unsigned int) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  memcpy(buf, "foobar", sizeof("foobar"));
+}
+
+void test_memmove() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to void* __builtin___memmove_chk(void*, const void*, {{(long )?}}unsigned int, {{(long )?}}unsigned int) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  memmove(buf, "foobar", sizeof("foobar"));
+}
+
+void test_memset() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to void* __builtin___memset_chk(void*, int, {{(long )?}}unsigned int, {{(long )?}}unsigned int) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  memset(buf, 0, 6);
+}
+
+void test_strcpy() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to {{(char\* __builtin___strcpy_chk\(char\*, const char\*, unsigned int\))|(void\* __builtin___memcpy_chk\(void\*, const void\*, (long )?unsigned int, (long )?unsigned int\))}} will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  strcpy(buf, "foobar");  // NOLINT(runtime/printf)
+}
+
+void test_stpcpy() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to char* __builtin___stpcpy_chk(char*, const char*, {{(long )?}}unsigned int) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  stpcpy(buf, "foobar");
+}
+
+void test_strncpy() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to char* __builtin___strncpy_chk(char*, const char*, {{(long )?}}unsigned int, {{(long )?}}unsigned int) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  strncpy(buf, "foobar", sizeof("foobar"));
+}
+
+void test_strcat() {
+  char buf[4] = "";
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to {{(char\* __builtin___strcat_chk\(char\*, const char\*, unsigned int\))|(void\* __builtin___memcpy_chk\(void\*, const void\*, (long )?unsigned int, (long )?unsigned int\))}} will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  strcat(buf, "foobar");  // NOLINT(runtime/printf)
+}
+
+void test_strncat() {
+  char buf[4] = "";
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to {{(char\* __builtin___strcat_chk\(char\*, const char\*, unsigned int\))|(void\* __builtin___memcpy_chk\(void\*, const void\*, (long )?unsigned int, (long )?unsigned int\))}} will always overflow destination buffer
+  // gcc output warning with __builtin___strcat_chk for __builtin___strncat_chk.
+  // clang should emit a warning, but doesn't
+  strncat(buf, "foobar", sizeof("foobar"));
+}
+
+void test_vsprintf(const char* fmt, ...) {
+  va_list va;
+  char buf[4];
+  va_start(va, fmt);
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to int __builtin___vsprintf_chk(char*, int, {{(long )?}}unsigned int, const char*, {{(__va_list)|(void\*)|(char\*)|(__va_list_tag\*)}}) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  vsprintf(buf, "foobar", va);
+  va_end(va);
+}
+
+void test_vsnprintf(const char* fmt, ...) {
+  va_list va;
+  char buf[4];
+  va_start(va, fmt);
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: warning: call to int __builtin___vsnprintf_chk(char*, {{(long )?}}unsigned int, int, {{(long )?}}unsigned int, const char*, {{(__va_list)|(void\*)|(char\*)|(__va_list_tag\*)}}) will always overflow destination buffer
+  // clang should emit a warning, but doesn't
+  vsnprintf(buf, 5, "foobar", va);  // NOLINT(runtime/printf)
+
+  va_end(va);
+}
+
+void test_fgets() {
+  char buf[4];
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__fgets_too_small_error' declared with attribute error: fgets called with size less than zero
+  // clang should emit a warning, but doesn't
+  fgets(buf, -1, stdin);
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__fgets_too_big_error' declared with attribute error: fgets called with size bigger than buffer
+  // clang should emit a warning, but doesn't
+  fgets(buf, 6, stdin);
+}
+
+void test_recvfrom() {
+  char buf[4];
+  sockaddr_in addr;
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__recvfrom_error' declared with attribute error: recvfrom called with size bigger than buffer
+  // clang should emit a warning, but doesn't
+  recvfrom(0, buf, 6, 0, reinterpret_cast<sockaddr*>(&addr), NULL);
+}
+
+void test_umask() {
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__umask_invalid_mode' declared with attribute error: umask called with invalid mode
+  // clang should emit a warning, but doesn't
+  umask(01777);
+}
+
+void test_read() {
+  char buf[4];
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__read_dest_size_error' declared with attribute error: read called with size bigger than destination
+  // clang should emit a warning, but doesn't
+  read(0, buf, 6);
+}
+
+void test_open() {
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__creat_missing_mode' declared with attribute error: called with O_CREAT, but missing mode
+  // clang should emit a warning, but doesn't
+  open("/dev/null", O_CREAT);
+
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__creat_too_many_args' declared with attribute error: too many arguments
+  // clang should emit a warning, but doesn't
+  open("/dev/null", O_CREAT, 0, 0);
+}
+
+void test_poll() {
+  pollfd fds[1];
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__poll_too_small_error' declared with attribute error: poll: pollfd array smaller than fd count
+  // clang should emit a warning, but doesn't
+  poll(fds, 2, 0);
+}
+
+void test_ppoll() {
+  pollfd fds[1];
+  timespec timeout;
+  // NOLINTNEXTLINE(whitespace/line_length)
+  // GCC: error: call to '__ppoll_too_small_error' declared with attribute error: ppoll: pollfd array smaller than fd count
+  // clang should emit a warning, but doesn't
+  ppoll(fds, 2, &timeout, NULL);
 }
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 5dc60ee..c507faa 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -33,6 +33,8 @@
 #include <time.h>
 #include <unistd.h>
 
+#include <atomic>
+
 TEST(pthread, pthread_key_create) {
   pthread_key_t key;
   ASSERT_EQ(0, pthread_key_create(&key, NULL));
@@ -699,6 +701,79 @@
   ASSERT_EQ(0, pthread_rwlock_destroy(&l));
 }
 
+struct RwlockWakeupHelperArg {
+  pthread_rwlock_t lock;
+  enum Progress {
+    LOCK_INITIALIZED,
+    LOCK_WAITING,
+    LOCK_RELEASED,
+    LOCK_ACCESSED
+  };
+  std::atomic<Progress> progress;
+};
+
+static void pthread_rwlock_reader_wakeup_writer_helper(RwlockWakeupHelperArg* arg) {
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_INITIALIZED, arg->progress);
+  arg->progress = RwlockWakeupHelperArg::LOCK_WAITING;
+
+  ASSERT_EQ(EBUSY, pthread_rwlock_trywrlock(&arg->lock));
+  ASSERT_EQ(0, pthread_rwlock_wrlock(&arg->lock));
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_RELEASED, arg->progress);
+  ASSERT_EQ(0, pthread_rwlock_unlock(&arg->lock));
+
+  arg->progress = RwlockWakeupHelperArg::LOCK_ACCESSED;
+}
+
+TEST(pthread, pthread_rwlock_reader_wakeup_writer) {
+  RwlockWakeupHelperArg wakeup_arg;
+  ASSERT_EQ(0, pthread_rwlock_init(&wakeup_arg.lock, NULL));
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&wakeup_arg.lock));
+  wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_INITIALIZED;
+
+  pthread_t thread;
+  ASSERT_EQ(0, pthread_create(&thread, NULL,
+    reinterpret_cast<void* (*)(void*)>(pthread_rwlock_reader_wakeup_writer_helper), &wakeup_arg));
+  sleep(1);
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+  wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
+  ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
+
+  ASSERT_EQ(0, pthread_join(thread, NULL));
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_ACCESSED, wakeup_arg.progress);
+  ASSERT_EQ(0, pthread_rwlock_destroy(&wakeup_arg.lock));
+}
+
+static void pthread_rwlock_writer_wakeup_reader_helper(RwlockWakeupHelperArg* arg) {
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_INITIALIZED, arg->progress);
+  arg->progress = RwlockWakeupHelperArg::LOCK_WAITING;
+
+  ASSERT_EQ(EBUSY, pthread_rwlock_tryrdlock(&arg->lock));
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&arg->lock));
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_RELEASED, arg->progress);
+  ASSERT_EQ(0, pthread_rwlock_unlock(&arg->lock));
+
+  arg->progress = RwlockWakeupHelperArg::LOCK_ACCESSED;
+}
+
+TEST(pthread, pthread_rwlock_writer_wakeup_reader) {
+  RwlockWakeupHelperArg wakeup_arg;
+  ASSERT_EQ(0, pthread_rwlock_init(&wakeup_arg.lock, NULL));
+  ASSERT_EQ(0, pthread_rwlock_wrlock(&wakeup_arg.lock));
+  wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_INITIALIZED;
+
+  pthread_t thread;
+  ASSERT_EQ(0, pthread_create(&thread, NULL,
+    reinterpret_cast<void* (*)(void*)>(pthread_rwlock_writer_wakeup_reader_helper), &wakeup_arg));
+  sleep(1);
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+  wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
+  ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
+
+  ASSERT_EQ(0, pthread_join(thread, NULL));
+  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_ACCESSED, wakeup_arg.progress);
+  ASSERT_EQ(0, pthread_rwlock_destroy(&wakeup_arg.lock));
+}
+
 static int g_once_fn_call_count = 0;
 static void OnceFn() {
   ++g_once_fn_call_count;