resolved conflicts for merge of 7b6e6fa5 to kraken

Change-Id: I2b9b80a7fa32c56be2b85ff8be0d6e7ac1848afe
diff --git a/libc/Android.mk b/libc/Android.mk
index 590afbe..831352b 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -322,6 +322,7 @@
 # up any thumb code.
 libc_common_src_files += \
 	bionic/pthread.c.arm \
+	bionic/pthread-rwlocks.c.arm \
 	bionic/pthread-timers.c.arm \
 	bionic/ptrace.c.arm
 
@@ -356,6 +357,7 @@
 	arch-x86/string/strncmp_wrapper.S \
 	arch-x86/string/strlen.S \
 	bionic/pthread.c \
+	bionic/pthread-rwlocks.c \
 	bionic/pthread-timers.c \
 	bionic/ptrace.c
 
@@ -393,6 +395,7 @@
 	string/strlen.c \
 	bionic/eabi.c \
 	bionic/pthread.c \
+	bionic/pthread-rwlocks.c \
 	bionic/pthread-timers.c \
 	bionic/ptrace.c \
 	unistd/socketcalls.c
@@ -452,6 +455,14 @@
   endif # x86
 endif # !arm
 
+# Define ANDROID_SMP appropriately.
+ifeq ($(TARGET_CPU_SMP),true)
+    libc_common_cflags += -DANDROID_SMP=1
+else
+    libc_common_cflags += -DANDROID_SMP=0
+endif
+
+
 # Define some common includes
 # ========================================================
 libc_common_c_includes := \
diff --git a/libc/arch-arm/bionic/atomics_arm.S b/libc/arch-arm/bionic/atomics_arm.S
index 047541f..d94f6b1 100644
--- a/libc/arch-arm/bionic/atomics_arm.S
+++ b/libc/arch-arm/bionic/atomics_arm.S
@@ -26,6 +26,7 @@
  * SUCH DAMAGE.
  */
 #include <sys/linux-syscalls.h>
+#include <machine/cpu-features.h>
 
 .global __atomic_cmpxchg
 .type __atomic_cmpxchg, %function
@@ -39,9 +40,73 @@
 #define FUTEX_WAIT 0
 #define FUTEX_WAKE 1
 
-#if 1
-   .equ     kernel_cmpxchg, 0xFFFF0FC0
-   .equ     kernel_atomic_base, 0xFFFF0FFF
+#if defined(__ARM_HAVE_LDREX_STREX)
+/*
+ * ===========================================================================
+ *      ARMv6+ implementation
+ * ===========================================================================
+ */
+
+/* r0(addr) -> r0(old) */
+__atomic_dec:
+    .fnstart
+    mov     r1, r0                      @ copy addr so we don't clobber it
+1:  ldrex   r0, [r1]                    @ load current value into r0
+    sub     r2, r0, #1                  @ generate new value into r2
+    strex   r3, r2, [r1]                @ try to store new value; result in r3
+    cmp     r3, #0                      @ success?
+    bxeq    lr                          @ yes, return
+    b       1b                          @ no, retry
+    .fnend
+
+/* r0(addr) -> r0(old) */
+__atomic_inc:
+    .fnstart
+    mov     r1, r0
+1:  ldrex   r0, [r1]
+    add     r2, r0, #1
+    strex   r3, r2, [r1]
+    cmp     r3, #0
+    bxeq    lr
+    b       1b
+    .fnend
+
+/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
+__atomic_cmpxchg:
+    .fnstart
+1:  mov     ip, #2                      @ ip=2 means "new != old"
+    ldrex   r3, [r2]                    @ load current value into r3
+    teq     r0, r3                      @ new == old?
+    strexeq ip, r1, [r2]                @ yes, try store, set ip to 0 or 1
+    teq     ip, #1                      @ strex failure?
+    beq     1b                          @ yes, retry
+    mov     r0, ip                      @ return 0 on success, 2 on failure
+    bx      lr
+    .fnend
+
+/* r0(new) r1(addr) -> r0(old) */
+__atomic_swap:
+    .fnstart
+1:  ldrex   r2, [r1]
+    strex   r3, r0, [r1]
+    teq     r3, #0
+    bne     1b
+    mov     r0, r2
+    bx      lr
+    .fnend
+
+#else /*not defined __ARM_HAVE_LDREX_STREX*/
+/*
+ * ===========================================================================
+ *      Pre-ARMv6 implementation
+ * ===========================================================================
+ */
+
+    /* int __kernel_cmpxchg(int oldval, int newval, int* ptr) */
+    .equ    kernel_cmpxchg, 0xFFFF0FC0
+    .equ    kernel_atomic_base, 0xFFFF0FFF
+
+/* r0(addr) -> r0(old) */
 __atomic_dec:
     .fnstart
     .save {r4, lr}
@@ -59,6 +124,7 @@
     bx      lr
     .fnend
 
+/* r0(addr) -> r0(old) */
 __atomic_inc:
     .fnstart
     .save {r4, lr}
@@ -95,64 +161,16 @@
     ldmia   sp!, {r4, lr}
     bx      lr
     .fnend
-#else
-#define KUSER_CMPXCHG 0xffffffc0
-
-/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-__atomic_cmpxchg:
-    stmdb   sp!, {r4, lr}
-    mov     r4, r0          /* r4 = save oldvalue */
-1:  add     lr, pc, #4
-    mov     r0, r4          /* r0 = oldvalue */
-    mov     pc, #KUSER_CMPXCHG
-    bcs     2f              /* swap was made. we're good, return. */
-    ldr     r3, [r2]        /* swap not made, see if it's because *ptr!=oldvalue */
-    cmp     r3, r4
-    beq     1b
-2:  ldmia   sp!, {r4, lr}
-    bx      lr
-
-/* r0(addr) -> r0(old) */
-__atomic_dec:
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0          /* address */
-1:  ldr     r0, [r2]        /* oldvalue */
-    add     lr, pc, #4
-    sub     r1, r0, #1      /* newvalue = oldvalue - 1 */
-    mov     pc, #KUSER_CMPXCHG
-    bcc     1b              /* no swap, try again until we get it right */
-    mov     r0, ip          /* swapped, return the old value */
-    ldmia   sp!, {r4, lr}
-    bx      lr
-
-/* r0(addr) -> r0(old) */
-__atomic_inc:
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0          /* address */
-1:  ldr     r0, [r2]        /* oldvalue */
-    add     lr, pc, #4
-    add     r1, r0, #1      /* newvalue = oldvalue + 1 */
-    mov     pc, #KUSER_CMPXCHG
-    bcc     1b              /* no swap, try again until we get it right */
-    mov     r0, ip          /* swapped, return the old value */
-    ldmia   sp!, {r4, lr}
-    bx      lr
-#endif
 
 /* r0(new) r1(addr) -> r0(old) */
-/* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */
 __atomic_swap:
-#if defined (_ARM_HAVE_LDREX_STREX)
-1:  ldrex   r2, [r1]
-    strex   r3, r0, [r1]
-    teq     r3, #0
-    bne     1b
-    mov     r0, r2
-    mcr     p15, 0, r0, c7, c10, 5 /* or, use dmb */
-#else
+    .fnstart
     swp     r0, r0, [r1]
-#endif
     bx      lr
+    .fnend
+
+#endif /*not defined __ARM_HAVE_LDREX_STREX*/
+
 
 /* __futex_wait(*ftx, val, *timespec) */
 /* __futex_wake(*ftx, counter) */
@@ -197,6 +215,8 @@
     .fnend
 
 __futex_wake:
+    .fnstart
+    .save   {r4, r7}
     stmdb   sp!, {r4, r7}
     mov     r2, r1
     mov     r1, #FUTEX_WAKE
@@ -204,6 +224,7 @@
     swi     #0
     ldmia   sp!, {r4, r7}
     bx      lr
+    .fnend
 
 #else
 
diff --git a/libc/arch-arm/include/machine/cpu-features.h b/libc/arch-arm/include/machine/cpu-features.h
index ecf6ff6..0f969fa 100644
--- a/libc/arch-arm/include/machine/cpu-features.h
+++ b/libc/arch-arm/include/machine/cpu-features.h
@@ -38,7 +38,7 @@
  * IMPORTANT: We have no intention to support anything below an ARMv4T !
  */
 
-/* _ARM_ARCH_REVISION is a number corresponding to the ARM revision
+/* __ARM_ARCH__ is a number corresponding to the ARM revision
  * we're going to support
  *
  * it looks like our toolchain doesn't define __ARM_ARCH__
@@ -142,18 +142,32 @@
  *
  *     ldr  pc, [<some address>]
  *
- * note that this affects any instruction that explicitely changes the
+ * note that this affects any instruction that explicitly changes the
  * value of the pc register, including ldm { ...,pc } or 'add pc, #offset'
  */
 #if __ARM_ARCH__ >= 5
 #  define __ARM_HAVE_PC_INTERWORK
 #endif
 
-/* define _ARM_HAVE_LDREX_STREX for ARMv6 and ARMv7 architecure to be
- * used in replacement of depricated swp instruction
+/* define __ARM_HAVE_LDREX_STREX for ARMv6 and ARMv7 architecture to be
+ * used in replacement of deprecated swp instruction
  */
 #if __ARM_ARCH__ >= 6
-#  define _ARM_HAVE_LDREX_STREX
+#  define __ARM_HAVE_LDREX_STREX
+#endif
+
+/* define __ARM_HAVE_DMB for ARMv7 architecture
+ */
+#if __ARM_ARCH__ >= 7
+#  define __ARM_HAVE_DMB
+#endif
+
+/* define __ARM_HAVE_LDREXD for ARMv7 architecture
+ * (also present in ARMv6K, but not implemented in ARMv7-M, neither of which
+ * we care about)
+ */
+#if __ARM_ARCH__ >= 7
+#  define __ARM_HAVE_LDREXD
 #endif
 
 
diff --git a/libc/bionic/pthread-rwlocks.c b/libc/bionic/pthread-rwlocks.c
new file mode 100644
index 0000000..ca3e95c
--- /dev/null
+++ b/libc/bionic/pthread-rwlocks.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "pthread_internal.h"
+#include <errno.h>
+
+/* Technical note:
+ *
+ * Possible states of a read/write lock:
+ *
+ *  - no readers and no writer (unlocked)
+ *  - one or more readers sharing the lock at the same time (read-locked)
+ *  - one writer holding the lock (write-lock)
+ *
+ * Additionally:
+ *  - trying to get the write-lock while there are any readers blocks
+ *  - trying to get the read-lock while there is a writer blocks
+ *  - a single thread can acquire the lock multiple times in the same mode
+ *
+ *  - Posix states that behaviour is undefined it a thread tries to acquire
+ *    the lock in two distinct modes (e.g. write after read, or read after write).
+ *
+ *  - This implementation tries to avoid writer starvation by making the readers
+ *    block as soon as there is a waiting writer on the lock. However, it cannot
+ *    completely eliminate it: each time the lock is unlocked, all waiting threads
+ *    are woken and battle for it, which one gets it depends on the kernel scheduler
+ *    and is semi-random.
+ *
+ */
+
+#define  __likely(cond)    __builtin_expect(!!(cond), 1)
+#define  __unlikely(cond)  __builtin_expect(!!(cond), 0)
+
+#define  RWLOCKATTR_DEFAULT     0
+#define  RWLOCKATTR_SHARED_MASK 0x0010
+
+extern pthread_internal_t* __get_thread(void);
+
+/* Return a global kernel ID for the current thread */
+static int __get_thread_id(void)
+{
+    return __get_thread()->kernel_id;
+}
+
+int pthread_rwlockattr_init(pthread_rwlockattr_t *attr)
+{
+    if (!attr)
+        return EINVAL;
+
+    *attr = PTHREAD_PROCESS_PRIVATE;
+    return 0;
+}
+
+int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr)
+{
+    if (!attr)
+        return EINVAL;
+
+    *attr = -1;
+    return 0;
+}
+
+int pthread_rwlockattr_setpshared(pthread_rwlockattr_t *attr, int  pshared)
+{
+    if (!attr)
+        return EINVAL;
+
+    switch (pshared) {
+    case PTHREAD_PROCESS_PRIVATE:
+    case PTHREAD_PROCESS_SHARED:
+        *attr = pshared;
+        return 0;
+    default:
+        return EINVAL;
+    }
+}
+
+int pthread_rwlockattr_getpshared(pthread_rwlockattr_t *attr, int *pshared)
+{
+    if (!attr || !pshared)
+        return EINVAL;
+
+    *pshared = *attr;
+    return 0;
+}
+
+int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
+{
+    pthread_mutexattr_t*  lock_attr = NULL;
+    pthread_condattr_t*   cond_attr = NULL;
+    pthread_mutexattr_t   lock_attr0;
+    pthread_condattr_t    cond_attr0;
+    int                   ret;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    if (attr && *attr == PTHREAD_PROCESS_SHARED) {
+        lock_attr = &lock_attr0;
+        pthread_mutexattr_init(lock_attr);
+        pthread_mutexattr_setpshared(lock_attr, PTHREAD_PROCESS_SHARED);
+
+        cond_attr = &cond_attr0;
+        pthread_condattr_init(cond_attr);
+        pthread_condattr_setpshared(cond_attr, PTHREAD_PROCESS_SHARED);
+    }
+
+    ret = pthread_mutex_init(&rwlock->lock, lock_attr);
+    if (ret != 0)
+        return ret;
+
+    ret = pthread_cond_init(&rwlock->cond, cond_attr);
+    if (ret != 0) {
+        pthread_mutex_destroy(&rwlock->lock);
+        return ret;
+    }
+
+    rwlock->numLocks = 0;
+    rwlock->pendingReaders = 0;
+    rwlock->pendingWriters = 0;
+    rwlock->writerThreadId = 0;
+
+    return 0;
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
+{
+    int  ret;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    if (rwlock->numLocks > 0)
+        return EBUSY;
+
+    pthread_cond_destroy(&rwlock->cond);
+    pthread_mutex_destroy(&rwlock->lock);
+    return 0;
+}
+
+/* Returns TRUE iff we can acquire a read lock. */
+static __inline__ int read_precondition(pthread_rwlock_t *rwlock, int  thread_id)
+{
+    /* We can't have the lock if any writer is waiting for it (writer bias).
+     * This tries to avoid starvation when there are multiple readers racing.
+     */
+    if (rwlock->pendingWriters > 0)
+        return 0;
+
+    /* We can have the lock if there is no writer, or if we write-own it */
+    /* The second test avoids a self-dead lock in case of buggy code. */
+    if (rwlock->writerThreadId == 0 || rwlock->writerThreadId == thread_id)
+        return 1;
+
+    /* Otherwise, we can't have it */
+    return 0;
+}
+
+/* returns TRUE iff we can acquire a write lock. */
+static __inline__ int write_precondition(pthread_rwlock_t *rwlock, int  thread_id)
+{
+    /* We can get the lock if nobody has it */
+    if (rwlock->numLocks == 0)
+        return 1;
+
+    /* Or if we already own it */
+    if (rwlock->writerThreadId == thread_id)
+        return 1;
+
+    /* Otherwise, not */
+    return 0;
+}
+
+/* This function is used to waken any waiting thread contending
+ * for the lock. One of them should be able to grab it after
+ * that.
+ */
+static void _pthread_rwlock_pulse(pthread_rwlock_t *rwlock)
+{
+    if (rwlock->pendingReaders > 0 || rwlock->pendingWriters > 0)
+        pthread_cond_broadcast(&rwlock->cond);
+}
+
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
+{
+    return pthread_rwlock_timedrdlock(rwlock, NULL);
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
+{
+    int ret = 0;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    pthread_mutex_lock(&rwlock->lock);
+    if (__unlikely(!read_precondition(rwlock, __get_thread_id())))
+        ret = EBUSY;
+    else
+        rwlock->numLocks ++;
+    pthread_mutex_unlock(&rwlock->lock);
+
+    return ret;
+}
+
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *rwlock, const struct timespec *abs_timeout)
+{
+    int thread_id, ret = 0;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    pthread_mutex_lock(&rwlock->lock);
+    thread_id = __get_thread_id();
+    if (__unlikely(!read_precondition(rwlock, thread_id))) {
+        rwlock->pendingReaders += 1;
+        do {
+            ret = pthread_cond_timedwait(&rwlock->cond, &rwlock->lock, abs_timeout);
+        } while (ret == 0 && !read_precondition(rwlock, thread_id));
+        rwlock->pendingReaders -= 1;
+        if (ret != 0)
+            goto EXIT;
+    }
+    rwlock->numLocks ++;
+EXIT:
+    pthread_mutex_unlock(&rwlock->lock);
+    return ret;
+}
+
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
+{
+    return pthread_rwlock_timedwrlock(rwlock, NULL);
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
+{
+    int thread_id, ret = 0;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    pthread_mutex_lock(&rwlock->lock);
+    thread_id = __get_thread_id();
+    if (__unlikely(!write_precondition(rwlock, thread_id))) {
+        ret = EBUSY;
+    } else {
+        rwlock->numLocks ++;
+        rwlock->writerThreadId = thread_id;
+    }
+    pthread_mutex_unlock(&rwlock->lock);
+    return ret;
+}
+
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlock, const struct timespec *abs_timeout)
+{
+    int thread_id, ret = 0;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    pthread_mutex_lock(&rwlock->lock);
+    thread_id = __get_thread_id();
+    if (__unlikely(!write_precondition(rwlock, thread_id))) {
+        /* If we can't read yet, wait until the rwlock is unlocked
+         * and try again. Increment pendingReaders to get the
+         * cond broadcast when that happens.
+         */
+        rwlock->pendingWriters += 1;
+        do {
+            ret = pthread_cond_timedwait(&rwlock->cond, &rwlock->lock, abs_timeout);
+        } while (ret == 0 && !write_precondition(rwlock, thread_id));
+        rwlock->pendingWriters -= 1;
+        if (ret != 0)
+            goto EXIT;
+    }
+    rwlock->numLocks ++;
+    rwlock->writerThreadId = thread_id;
+EXIT:
+    pthread_mutex_unlock(&rwlock->lock);
+    return ret;
+}
+
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
+{
+    int  ret = 0;
+
+    if (rwlock == NULL)
+        return EINVAL;
+
+    pthread_mutex_lock(&rwlock->lock);
+
+    /* The lock must be held */
+    if (rwlock->numLocks == 0) {
+        ret = EPERM;
+        goto EXIT;
+    }
+
+    /* If it has only readers, writerThreadId is 0 */
+    if (rwlock->writerThreadId == 0) {
+        if (--rwlock->numLocks == 0)
+            _pthread_rwlock_pulse(rwlock);
+    }
+    /* Otherwise, it has only a single writer, which
+     * must be ourselves.
+     */
+    else {
+        if (rwlock->writerThreadId != __get_thread_id()) {
+            ret = EPERM;
+            goto EXIT;
+        }
+        if (--rwlock->numLocks == 0) {
+            rwlock->writerThreadId = 0;
+            _pthread_rwlock_pulse(rwlock);
+        }
+    }
+EXIT:
+    pthread_mutex_unlock(&rwlock->lock);
+    return ret;
+}
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index e17e366..6fd47f2 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -44,6 +44,7 @@
 #include <assert.h>
 #include <malloc.h>
 #include <linux/futex.h>
+#include <cutils/atomic-inline.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -939,6 +940,7 @@
         while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
             __futex_syscall4(&mutex->value, wait_op, shared|2, 0);
     }
+    ANDROID_MEMBAR_FULL();
 }
 
 /*
@@ -948,6 +950,8 @@
 static __inline__ void
 _normal_unlock(pthread_mutex_t*  mutex)
 {
+    ANDROID_MEMBAR_FULL();
+
     /* We need to preserve the shared flag during operations */
     int  shared = mutex->value & MUTEX_SHARED_MASK;
 
@@ -1147,8 +1151,10 @@
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0)
+        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+            ANDROID_MEMBAR_FULL();
             return 0;
+        }
 
         return EBUSY;
     }
@@ -1244,9 +1250,11 @@
     {
         int  wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
 
-        /* fast path for unconteded lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0)
+        /* fast path for uncontended lock */
+        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+            ANDROID_MEMBAR_FULL();
             return 0;
+        }
 
         /* loop while needed */
         while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
@@ -1255,6 +1263,7 @@
 
             __futex_syscall4(&mutex->value, wait_op, shared|2, &ts);
         }
+        ANDROID_MEMBAR_FULL();
         return 0;
     }
 
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index 84b9314..b624943 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -30,6 +30,7 @@
 #include <sys/time.h>
 #include <sys/atomics.h>
 #include <time.h>
+#include <cutils/atomic-inline.h>
 
 int sem_init(sem_t *sem, int pshared, unsigned int value)
 {
@@ -103,6 +104,7 @@
     return old;
 }
 
+/* lock a semaphore */
 int sem_wait(sem_t *sem)
 {
     if (sem == NULL) {
@@ -116,6 +118,7 @@
 
         __futex_wait(&sem->count, 0, 0);
     }
+    ANDROID_MEMBAR_FULL();
     return 0;
 }
 
@@ -130,8 +133,10 @@
 
     /* POSIX says we need to try to decrement the semaphore
      * before checking the timeout value */
-    if (__atomic_dec_if_positive(&sem->count))
+    if (__atomic_dec_if_positive(&sem->count)) {
+        ANDROID_MEMBAR_FULL();
         return 0;
+    }
 
     /* check it as per Posix */
     if (abs_timeout == NULL    ||
@@ -169,17 +174,21 @@
             return -1;
         }
 
-        if (__atomic_dec_if_positive(&sem->count))
+        if (__atomic_dec_if_positive(&sem->count)) {
+            ANDROID_MEMBAR_FULL();
             break;
+        }
     }
     return 0;
 }
 
+/* unlock a semaphore */
 int sem_post(sem_t *sem)
 {
     if (sem == NULL)
         return EINVAL;
 
+    ANDROID_MEMBAR_FULL();
     if (__atomic_inc((volatile int*)&sem->count) >= 0)
         __futex_wake(&sem->count, 1);
 
@@ -194,6 +203,7 @@
     }
 
     if (__atomic_dec_if_positive(&sem->count) > 0) {
+        ANDROID_MEMBAR_FULL();
         return 0;
     } else {
         errno = EAGAIN;
diff --git a/libc/docs/CHANGES.TXT b/libc/docs/CHANGES.TXT
index 9080685..a66e737 100644
--- a/libc/docs/CHANGES.TXT
+++ b/libc/docs/CHANGES.TXT
@@ -1,7 +1,12 @@
 Bionic ChangeLog:
 -----------------
 
-Differences between current and Android 2.1:
+Differences between current and Android 2.2:
+
+- <pthread.h>: Add reader/writer locks implementation.
+
+-------------------------------------------------------------------------------
+Differences between Android 2.2. and Android 2.1:
 
 - Add support for SH-4 CPU architecture !
 
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 944bb68..9773dcb 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -219,6 +219,41 @@
  */
 int pthread_mutex_lock_timeout_np(pthread_mutex_t *mutex, unsigned msecs);
 
+/* read-write lock support */
+
+typedef int pthread_rwlockattr_t;
+
+typedef struct {
+    pthread_mutex_t  lock;
+    pthread_cond_t   cond;
+    int              numLocks;
+    int              writerThreadId;
+    int              pendingReaders;
+    int              pendingWriters;
+    void*            reserved[4];  /* for future extensibility */
+} pthread_rwlock_t;
+
+#define PTHREAD_RWLOCK_INITIALIZER  { PTHREAD_MUTEX_INITIALIZER, 0, NULL, 0, 0 }
+
+int pthread_rwlockattr_init(pthread_rwlockattr_t *attr);
+int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr);
+int pthread_rwlockattr_setpshared(pthread_rwlockattr_t *attr, int  pshared);
+int pthread_rwlockattr_getpshared(pthread_rwlockattr_t *attr, int *pshared);
+
+int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *rwlock, const struct timespec *abs_timeout);
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlock, const struct timespec *abs_timeout);
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
+
+
 int pthread_key_create(pthread_key_t *key, void (*destructor_function)(void *));
 int pthread_key_delete (pthread_key_t);
 int pthread_setspecific(pthread_key_t key, const void *value);