Merge "Let g_thread_list_lock only protect g_thread_list."
diff --git a/libc/bionic/NetdClient.cpp b/libc/bionic/NetdClient.cpp
index 5b0f4fd..b117d72 100644
--- a/libc/bionic/NetdClient.cpp
+++ b/libc/bionic/NetdClient.cpp
@@ -34,7 +34,7 @@
 }
 
 static void netdClientInitImpl() {
-    void* netdClientHandle = dlopen("libnetd_client.so", RTLD_LAZY);
+    void* netdClientHandle = dlopen("libnetd_client.so", RTLD_NOW);
     if (netdClientHandle == NULL) {
         // If the library is not available, it's not an error. We'll just use
         // default implementations of functions that it would've overridden.
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index 1a2765a..ee796c6 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -402,7 +402,7 @@
   }
 
   // Load .so that implements the required malloc debugging functionality.
-  void* malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
+  void* malloc_impl_handle = dlopen(so_name, RTLD_NOW);
   if (malloc_impl_handle == NULL) {
     error_log("%s: Missing module %s required for malloc debug level %d: %s",
               getprogname(), so_name, g_malloc_debug_level, dlerror());
diff --git a/libc/bionic/pthread_cond.cpp b/libc/bionic/pthread_cond.cpp
index 95a433c..4a69da5 100644
--- a/libc/bionic/pthread_cond.cpp
+++ b/libc/bionic/pthread_cond.cpp
@@ -120,9 +120,15 @@
 #endif
 };
 
+static_assert(sizeof(pthread_cond_t) == sizeof(pthread_cond_internal_t),
+              "pthread_cond_t should actually be pthread_cond_internal_t in implementation.");
+
+// For binary compatibility with old version of pthread_cond_t, we can't use more strict alignment
+// than 4-byte alignment.
+static_assert(alignof(pthread_cond_t) == 4,
+              "pthread_cond_t should fulfill the alignment requirement of pthread_cond_internal_t.");
+
 static pthread_cond_internal_t* __get_internal_cond(pthread_cond_t* cond_interface) {
-  static_assert(sizeof(pthread_cond_t) == sizeof(pthread_cond_internal_t),
-                "pthread_cond_t should actually be pthread_cond_internal_t in implementation.");
   return reinterpret_cast<pthread_cond_internal_t*>(cond_interface);
 }
 
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index ef3ce05..3d73d52 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -158,15 +158,16 @@
   }
 
   // Mapped space(or user allocated stack) is used for:
-  //   thread_internal_t
+  //   pthread_internal_t
   //   thread stack (including guard page)
-  stack_top -= sizeof(pthread_internal_t);
+
+  // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
+  stack_top = reinterpret_cast<uint8_t*>(
+                (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
+
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
-  // No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned,
-  // and user allocated stack is guaranteed by pthread_attr_setstack.
-
   thread->mmap_size = mmap_size;
   thread->attr = *attr;
   __init_tls(thread);
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 99c455e..2151e03 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -103,7 +103,7 @@
    */
 #define __BIONIC_DLERROR_BUFFER_SIZE 512
   char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
-} __attribute__((aligned(16))); // Align it as thread stack top below it should be aligned.
+};
 
 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index f089940..8aa40ae 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -107,9 +107,15 @@
 #endif
 };
 
+static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
+              "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
+
+// For binary compatibility with old version of pthread_rwlock_t, we can't use more strict
+// alignment than 4-byte alignment.
+static_assert(alignof(pthread_rwlock_t) == 4,
+             "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t.");
+
 static inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
-  static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
-                "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
   return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface);
 }
 
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 09ea113..234a43d 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -78,7 +78,7 @@
 #else
   char __private[4];
 #endif
-} pthread_cond_t __attribute__((aligned(sizeof(long))));
+} pthread_cond_t __attribute__((aligned(4)));
 
 #define PTHREAD_COND_INITIALIZER  { { 0 } }
 
@@ -93,7 +93,7 @@
 #else
   char __private[40];
 #endif
-} pthread_rwlock_t __attribute__((aligned(8)));
+} pthread_rwlock_t __attribute__((aligned(4)));
 
 #define PTHREAD_RWLOCK_INITIALIZER  { { 0 } }
 
diff --git a/linker/linker.h b/linker/linker.h
index 04acda4..05735f6 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -149,13 +149,13 @@
   ElfW(Addr) base;
   size_t size;
 
-#ifndef __LP64__
+#if defined(__arm__)
   uint32_t unused1;  // DO NOT USE, maintained for compatibility.
 #endif
 
   ElfW(Dyn)* dynamic;
 
-#ifndef __LP64__
+#if defined(__arm__)
   uint32_t unused2; // DO NOT USE, maintained for compatibility
   uint32_t unused3; // DO NOT USE, maintained for compatibility
 #endif
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 251a230..4eb352d 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -33,6 +33,7 @@
 #include <unistd.h>
 
 #include <atomic>
+#include <vector>
 
 TEST(pthread, pthread_key_create) {
   pthread_key_t key;
@@ -1303,3 +1304,60 @@
   // Change the implementation if we need to support higher value than 65535.
   ASSERT_LE(pid_max, 65536);
 }
+
+class StrictAlignmentAllocator {
+ public:
+  void* allocate(size_t size, size_t alignment) {
+    char* p = new char[size + alignment * 2];
+    allocated_array.push_back(p);
+    while (!is_strict_aligned(p, alignment)) {
+      ++p;
+    }
+    return p;
+  }
+
+  ~StrictAlignmentAllocator() {
+    for (auto& p : allocated_array) {
+      delete [] p;
+    }
+  }
+
+ private:
+  bool is_strict_aligned(char* p, size_t alignment) {
+    return (reinterpret_cast<uintptr_t>(p) % (alignment * 2)) == alignment;
+  }
+
+  std::vector<char*> allocated_array;
+};
+
+TEST(pthread, pthread_types_allow_four_bytes_alignment) {
+#if defined(__BIONIC__)
+  // For binary compatibility with old version, we need to allow 4-byte aligned data for pthread types.
+  StrictAlignmentAllocator allocator;
+  pthread_mutex_t* mutex = reinterpret_cast<pthread_mutex_t*>(
+                             allocator.allocate(sizeof(pthread_mutex_t), 4));
+  ASSERT_EQ(0, pthread_mutex_init(mutex, NULL));
+  ASSERT_EQ(0, pthread_mutex_lock(mutex));
+  ASSERT_EQ(0, pthread_mutex_unlock(mutex));
+  ASSERT_EQ(0, pthread_mutex_destroy(mutex));
+
+  pthread_cond_t* cond = reinterpret_cast<pthread_cond_t*>(
+                           allocator.allocate(sizeof(pthread_cond_t), 4));
+  ASSERT_EQ(0, pthread_cond_init(cond, NULL));
+  ASSERT_EQ(0, pthread_cond_signal(cond));
+  ASSERT_EQ(0, pthread_cond_broadcast(cond));
+  ASSERT_EQ(0, pthread_cond_destroy(cond));
+
+  pthread_rwlock_t* rwlock = reinterpret_cast<pthread_rwlock_t*>(
+                               allocator.allocate(sizeof(pthread_rwlock_t), 4));
+  ASSERT_EQ(0, pthread_rwlock_init(rwlock, NULL));
+  ASSERT_EQ(0, pthread_rwlock_rdlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_unlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_wrlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_unlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_destroy(rwlock));
+
+#else
+  GTEST_LOG_(INFO) << "This test tests bionic implementation details.";
+#endif
+}
diff --git a/tools/bionicbb/gmail_listener.py b/tools/bionicbb/gmail_listener.py
index 0cd31c9..770f0c4 100644
--- a/tools/bionicbb/gmail_listener.py
+++ b/tools/bionicbb/gmail_listener.py
@@ -190,8 +190,10 @@
         if lunch_target is not None:
             params['LUNCH_TARGET'] = lunch_target
         if not dry_run:
-            job = jenkins[build].invoke(build_params=params)
-            url = job.get_build().baseurl
+            _ = jenkins[build].invoke(build_params=params)
+            # https://issues.jenkins-ci.org/browse/JENKINS-27256
+            # url = job.get_build().baseurl
+            url = 'URL UNAVAILABLE'
         else:
             url = 'DRY_RUN_URL'
         print '{}({}): {} => {} {} {}'.format(