Support priority inheritance mutex in 64bit programs.

Bug: http://b/29177606
Test: run bionic-unit-tests on walleye.
Test: run bionic-unit-tests-glibc on host.
Change-Id: Iac349284aa73515f384e7509445f87434757f59e
diff --git a/libc/bionic/pthread_mutex.cpp b/libc/bionic/pthread_mutex.cpp
index 14e0ab0..ed90639 100644
--- a/libc/bionic/pthread_mutex.cpp
+++ b/libc/bionic/pthread_mutex.cpp
@@ -49,9 +49,13 @@
  * bits:     name       description
  * 0-3       type       type of mutex
  * 4         shared     process-shared flag
+ * 5         protocol   whether it is a priority inherit mutex.
  */
 #define  MUTEXATTR_TYPE_MASK   0x000f
 #define  MUTEXATTR_SHARED_MASK 0x0010
+#define MUTEXATTR_PROTOCOL_MASK 0x0020
+
+#define MUTEXATTR_PROTOCOL_SHIFT 5
 
 int pthread_mutexattr_init(pthread_mutexattr_t *attr)
 {
@@ -113,17 +117,119 @@
     return 0;
 }
 
-/* a mutex contains a state value and a owner_tid.
- * The value is implemented as a 16-bit integer holding the following fields:
- *
- * bits:     name     description
- * 15-14     type     mutex type
- * 13        shared   process-shared flag
- * 12-2      counter  counter of recursive mutexes
- * 1-0       state    lock state (0, 1 or 2)
- *
- * The owner_tid is used only in recursive and errorcheck mutex to hold the mutex owner thread tid.
- */
+int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) {
+    if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) {
+        return EINVAL;
+    }
+    *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT);
+    return 0;
+}
+
+int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) {
+    *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT;
+    return 0;
+}
+
+#if defined(__LP64__)
+
+// Priority Inheritance mutex implementation
+struct PIMutex {
+  // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime
+  uint8_t type;
+  // process-shared flag, constant during lifetime
+  bool shared;
+  // <number of times a thread holding a recursive PI mutex> - 1
+  uint16_t counter;
+  // owner_tid is read/written by both userspace code and kernel code. It includes three fields:
+  // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK.
+  atomic_int owner_tid;
+};
+
+static inline __always_inline int PIMutexTryLock(PIMutex& mutex) {
+    pid_t tid = __get_thread()->tid;
+    // Handle common case first.
+    int old_owner = 0;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
+                                                               &old_owner, tid,
+                                                               memory_order_acquire,
+                                                               memory_order_relaxed))) {
+        return 0;
+    }
+    if (tid == (old_owner & FUTEX_TID_MASK)) {
+        // We already own this mutex.
+        if (mutex.type == PTHREAD_MUTEX_NORMAL) {
+            return EBUSY;
+        }
+        if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) {
+            return EDEADLK;
+        }
+        if (mutex.counter == 0xffff) {
+            return EAGAIN;
+        }
+        mutex.counter++;
+        return 0;
+    }
+    return EBUSY;
+}
+
+static int PIMutexTimedLock(PIMutex& mutex, const timespec* abs_timeout) {
+    int ret = PIMutexTryLock(mutex);
+    if (__predict_true(ret == 0)) {
+        return 0;
+    }
+    if (ret == EBUSY) {
+        ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, true, abs_timeout);
+    }
+    return ret;
+}
+
+static int PIMutexUnlock(PIMutex& mutex) {
+    pid_t tid = __get_thread()->tid;
+    int old_owner = tid;
+    // Handle common case first.
+    if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) {
+        if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
+                                                                   &old_owner, 0,
+                                                                   memory_order_release,
+                                                                   memory_order_relaxed))) {
+            return 0;
+        }
+    }
+
+    if (tid != (old_owner & FUTEX_TID_MASK)) {
+        // The mutex can only be unlocked by the thread who owns it.
+        return EPERM;
+    }
+    if (mutex.type == PTHREAD_MUTEX_RECURSIVE) {
+        if (mutex.counter != 0u) {
+            --mutex.counter;
+            return 0;
+        }
+    }
+    if (old_owner == tid) {
+        // No thread is waiting.
+        if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
+                                                                   &old_owner, 0,
+                                                                   memory_order_release,
+                                                                   memory_order_relaxed))) {
+            return 0;
+        }
+    }
+    return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared);
+}
+
+static int PIMutexDestroy(PIMutex& mutex) {
+    // The mutex should be in unlocked state (owner_tid == 0) when destroyed.
+    // Store 0xffffffff to make the mutex unusable.
+    int old_owner = 0;
+    if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff,
+                                                memory_order_relaxed, memory_order_relaxed)) {
+        return 0;
+    }
+    return EBUSY;
+}
+#endif  // defined(__LP64__)
+
 
 /* Convenience macro, creates a mask of 'bits' bits that starts from
  * the 'shift'-th least significant bit in a 32-bit word.
@@ -139,7 +245,6 @@
 /* And this one does the opposite, i.e. extract a field's value from a bit pattern */
 #define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
 
-
 /* Convenience macros.
  *
  * These are used to form or modify the bit pattern of a given mutex value
@@ -214,13 +319,47 @@
 #define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
 #define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
 #define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
+// Use a special mutex type to mark priority inheritance mutexes.
+#define  MUTEX_TYPE_BITS_WITH_PI     MUTEX_TYPE_TO_BITS(3)
 
+// For a PI mutex, it includes below fields:
+//   Atomic(uint16_t) state;
+//   PIMutex pi_mutex;
+//
+//   state holds the following fields:
+//
+//   bits:   name    description
+//   15-14   type    mutex type, should be 3
+//
+//   pi_mutex holds the state of a PI mutex.
+//
+// For a Non-PI mutex, it includes below fields:
+//   Atomic(uint16_t) state;
+//   atomic_int owner_tid;  // Atomic(uint16_t) in 32-bit programs
+//
+//   state holds the following fields:
+//
+//   bits:     name     description
+//   15-14     type     mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck)
+//   13        shared   process-shared flag
+//   12-2      counter  <number of times a thread holding a recursive Non-PI mutex> - 1
+//   1-0       state    lock state (0, 1 or 2)
+//
+//   bits 15-13 are constant during the lifetime of the mutex.
+//
+//   owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner
+//   thread id.
+//
+// PI mutexes and Non-PI mutexes are distinguished by checking type field in state.
 struct pthread_mutex_internal_t {
   _Atomic(uint16_t) state;
 #if defined(__LP64__)
   uint16_t __pad;
-  atomic_int owner_tid;
-  char __reserved[32];
+  union {
+    atomic_int owner_tid;
+    PIMutex pi_mutex;
+  };
+  char __reserved[28];
 #else
   _Atomic(uint16_t) owner_tid;
 #endif
@@ -267,13 +406,26 @@
         return EINVAL;
     }
 
-    atomic_init(&mutex->state, state);
-    atomic_init(&mutex->owner_tid, 0);
+    if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) {
+#if defined(__LP64__)
+        atomic_init(&mutex->state, MUTEX_TYPE_BITS_WITH_PI);
+        mutex->pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK;
+        mutex->pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0;
+#else
+        return EINVAL;
+#endif
+    } else {
+        atomic_init(&mutex->state, state);
+        atomic_init(&mutex->owner_tid, 0);
+    }
     return 0;
 }
 
-static inline __always_inline int __pthread_normal_mutex_trylock(pthread_mutex_internal_t* mutex,
-                                                                 uint16_t shared) {
+// namespace for Non-PI mutex routines.
+namespace NonPI {
+
+static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex,
+                                                     uint16_t shared) {
     const uint16_t unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
     const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 
@@ -286,7 +438,7 @@
 }
 
 /*
- * Lock a mutex of type NORMAL.
+ * Lock a normal Non-PI mutex.
  *
  * As noted above, there are three states:
  *   0 (unlocked, no contention)
@@ -297,11 +449,11 @@
  * "type" value is zero, so the only bits that will be set are the ones in
  * the lock state field.
  */
-static inline __always_inline int __pthread_normal_mutex_lock(pthread_mutex_internal_t* mutex,
-                                                              uint16_t shared,
-                                                              bool use_realtime_clock,
-                                                              const timespec* abs_timeout_or_null) {
-    if (__predict_true(__pthread_normal_mutex_trylock(mutex, shared) == 0)) {
+static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex,
+                                                  uint16_t shared,
+                                                  bool use_realtime_clock,
+                                                  const timespec* abs_timeout_or_null) {
+    if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) {
         return 0;
     }
     int result = check_timespec(abs_timeout_or_null, true);
@@ -333,11 +485,11 @@
 }
 
 /*
- * Release a normal mutex.  The caller is responsible for determining
+ * Release a normal Non-PI mutex.  The caller is responsible for determining
  * that we are in fact the owner of this lock.
  */
-static inline __always_inline void __pthread_normal_mutex_unlock(pthread_mutex_internal_t* mutex,
-                                                                 uint16_t shared) {
+static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex,
+                                                     uint16_t shared) {
     const uint16_t unlocked         = shared | MUTEX_STATE_BITS_UNLOCKED;
     const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 
@@ -370,14 +522,14 @@
     }
 }
 
-/* This common inlined function is used to increment the counter of a recursive mutex.
+/* This common inlined function is used to increment the counter of a recursive Non-PI mutex.
  *
  * If the counter overflows, it will return EAGAIN.
  * Otherwise, it atomically increments the counter and returns 0.
  *
  */
-static inline __always_inline int __recursive_increment(pthread_mutex_internal_t* mutex,
-                                                        uint16_t old_state) {
+static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex,
+                                                     uint16_t old_state) {
     // Detect recursive lock overflow and return EAGAIN.
     // This is safe because only the owner thread can modify the
     // counter bits in the mutex value.
@@ -387,17 +539,17 @@
 
     // Other threads are able to change the lower bits (e.g. promoting it to "contended"),
     // but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
-    // The mutex is still locked by current thread, so we don't need a release fence.
+    // The mutex is already locked by current thread, so we don't need an acquire fence.
     atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
     return 0;
 }
 
-static inline __always_inline int __recursive_or_errorcheck_mutex_wait(
-                                                      pthread_mutex_internal_t* mutex,
-                                                      uint16_t shared,
-                                                      uint16_t old_state,
-                                                      bool use_realtime_clock,
-                                                      const timespec* abs_timeout) {
+// Wait on a recursive or errorcheck Non-PI mutex.
+static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex,
+                                                                 uint16_t shared,
+                                                                 uint16_t old_state,
+                                                                 bool use_realtime_clock,
+                                                                 const timespec* abs_timeout) {
 // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
 // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
 // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
@@ -418,16 +570,16 @@
 #endif
 }
 
-static int __pthread_mutex_lock_with_timeout(pthread_mutex_internal_t* mutex,
-                                             bool use_realtime_clock,
-                                             const timespec* abs_timeout_or_null) {
+// Lock a Non-PI mutex.
+static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock,
+                                const timespec* abs_timeout_or_null) {
     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
     uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
     uint16_t shared = (old_state & MUTEX_SHARED_MASK);
 
     // Handle common case first.
     if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
-        return __pthread_normal_mutex_lock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
+        return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
     }
 
     // Do we already own this recursive or error-check mutex?
@@ -436,7 +588,7 @@
         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
             return EDEADLK;
         }
-        return __recursive_increment(mutex, old_state);
+        return RecursiveIncrement(mutex, old_state);
     }
 
     const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
@@ -492,14 +644,16 @@
             return result;
         }
         // We are in locked_contended state, sleep until someone wakes us up.
-        if (__recursive_or_errorcheck_mutex_wait(mutex, shared, old_state, use_realtime_clock,
-                                                 abs_timeout_or_null) == -ETIMEDOUT) {
+        if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock,
+                                           abs_timeout_or_null) == -ETIMEDOUT) {
             return ETIMEDOUT;
         }
         old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
     }
 }
 
+}  // namespace NonPI
+
 int pthread_mutex_lock(pthread_mutex_t* mutex_interface) {
 #if !defined(__LP64__)
     // Some apps depend on being able to pass NULL as a mutex and get EINVAL
@@ -517,11 +671,16 @@
     uint16_t shared = (old_state & MUTEX_SHARED_MASK);
     // Avoid slowing down fast path of normal mutex lock operation.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-      if (__predict_true(__pthread_normal_mutex_trylock(mutex, shared) == 0)) {
+      if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
         return 0;
       }
     }
-    return __pthread_mutex_lock_with_timeout(mutex, false, nullptr);
+#if defined(__LP64__)
+    if (mtype == MUTEX_TYPE_BITS_WITH_PI) {
+        return PIMutexTimedLock(mutex->pi_mutex, nullptr);
+    }
+#endif
+    return NonPI::MutexLockWithTimeout(mutex, false, nullptr);
 }
 
 int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) {
@@ -542,9 +701,14 @@
 
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        __pthread_normal_mutex_unlock(mutex, shared);
+        NonPI::NormalMutexUnlock(mutex, shared);
         return 0;
     }
+#if defined(__LP64__)
+    if (mtype == MUTEX_TYPE_BITS_WITH_PI) {
+        return PIMutexUnlock(mutex->pi_mutex);
+    }
+#endif
 
     // Do we already own this recursive or error-check mutex?
     pid_t tid = __get_thread()->tid;
@@ -582,15 +746,17 @@
 
     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
     uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
-    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
-
-    const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
-    const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        return __pthread_normal_mutex_trylock(mutex, shared);
+        uint16_t shared = (old_state & MUTEX_SHARED_MASK);
+        return NonPI::NormalMutexTryLock(mutex, shared);
     }
+#if defined(__LP64__)
+    if (mtype == MUTEX_TYPE_BITS_WITH_PI) {
+        return PIMutexTryLock(mutex->pi_mutex);
+    }
+#endif
 
     // Do we already own this recursive or error-check mutex?
     pid_t tid = __get_thread()->tid;
@@ -598,9 +764,13 @@
         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
             return EBUSY;
         }
-        return __recursive_increment(mutex, old_state);
+        return NonPI::RecursiveIncrement(mutex, old_state);
     }
 
+    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
+    const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+
     // Same as pthread_mutex_lock, except that we don't want to wait, and
     // the only operation that can succeed is a single compare_exchange to acquire the
     // lock if it is released / not owned by anyone. No need for a complex loop.
@@ -623,8 +793,8 @@
     timespec_from_ms(ts, ms);
     timespec abs_timeout;
     absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC);
-    int error = __pthread_mutex_lock_with_timeout(__get_internal_mutex(mutex_interface),
-                                                  false, &abs_timeout);
+    int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false,
+                                            &abs_timeout);
     if (error == ETIMEDOUT) {
         error = EBUSY;
     }
@@ -633,13 +803,33 @@
 #endif
 
 int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const timespec* abs_timeout) {
-    return __pthread_mutex_lock_with_timeout(__get_internal_mutex(mutex_interface),
-                                             true, abs_timeout);
+    pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
+    uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+    uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
+    // Handle common case first.
+    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
+        uint16_t shared = (old_state & MUTEX_SHARED_MASK);
+        if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
+            return 0;
+        }
+    }
+#if defined(__LP64__)
+    if (mtype == MUTEX_TYPE_BITS_WITH_PI) {
+        return PIMutexTimedLock(mutex->pi_mutex, abs_timeout);
+    }
+#endif
+    return NonPI::MutexLockWithTimeout(mutex, true, abs_timeout);
 }
 
 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {
     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+#if defined(__LP64__)
+    uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
+    if (mtype == MUTEX_TYPE_BITS_WITH_PI) {
+        return PIMutexDestroy(mutex->pi_mutex);
+    }
+#endif
     // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined
     // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation.
     if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) &&