Add _monotonic_np versions of timed wait functions

As a follow up to Ibba98f5d88be1c306d14e9b9366302ecbef6d534, where we
added a work around to convert the CLOCK_REALTIME timeouts to
CLOCK_MONOTONIC for pthread and semaphore timed wait functions, we're
introducing a set of _monotonic_np versions of each of these functions
that wait on CLOCK_MONOTONIC directly.

The primary motivation here is that while the above work around helps
for 3rd party code, it creates a dilemma when implementing new code
that would use these functions: either one implements code with these
functions knowing there is a race condition possible or one avoids
these functions and reinvent their own waiting/signaling mechanisms.
Neither are satisfactory, so we create a third option to use these
Android specific _monotonic_np functions that completely remove the
race condition while keeping the rest of the interface.

Specifically this adds the below functions:
pthread_mutex_timedlock_monotonic_np()
pthread_cond_timedwait_monotonic_np()
pthread_rwlock_timedrdlock_monotonic_np()
pthread_rwlock_timedwrlock_monotonic_np()
sem_timedwait_monotonic_np()

Note that pthread_cond_timedwait_monotonic_np() previously existed and
was removed since it's possible to initialize a condition variable to
use CLOCK_MONOTONIC.  It is added back for a mix of reasons,
1) Symmetry with the rest of the functions we're adding
2) libc++ cannot easily take advantage of the new initializer, but
   will be able to use this function in order to wait on
   std::steady_clock
3) Frankly, it's a better API to specify the clock in the waiter function
   than to specify the clock when the condition variable is
   initialized.

Bug: 73951740
Test: new unit tests
Change-Id: I23aa5c204e36a194237d41e064c5c8ccaa4204e3
diff --git a/libc/bionic/pthread_mutex.cpp b/libc/bionic/pthread_mutex.cpp
index 9f9a0c2..7f48972 100644
--- a/libc/bionic/pthread_mutex.cpp
+++ b/libc/bionic/pthread_mutex.cpp
@@ -176,6 +176,7 @@
 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
 // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline.
 static int  __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex,
+                                                       bool use_realtime_clock,
                                                        const timespec* abs_timeout) {
     int ret = PIMutexTryLock(mutex);
     if (__predict_true(ret == 0)) {
@@ -183,7 +184,7 @@
     }
     if (ret == EBUSY) {
         ScopedTrace trace("Contending for pthread mutex");
-        ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, true, abs_timeout);
+        ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout);
     }
     return ret;
 }
@@ -820,7 +821,7 @@
         if (__predict_true(PIMutexTryLock(m) == 0)) {
             return 0;
         }
-        return PIMutexTimedLock(mutex->ToPIMutex(), nullptr);
+        return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr);
     }
     if (__predict_false(IsMutexDestroyed(old_state))) {
         return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
@@ -948,7 +949,8 @@
 }
 #endif
 
-int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const timespec* abs_timeout) {
+static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock,
+                                     const timespec* abs_timeout, const char* function) {
     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
     uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
@@ -960,12 +962,21 @@
         }
     }
     if (old_state == PI_MUTEX_STATE) {
-        return PIMutexTimedLock(mutex->ToPIMutex(), abs_timeout);
+        return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout);
     }
     if (__predict_false(IsMutexDestroyed(old_state))) {
-        return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
+        return HandleUsingDestroyedMutex(mutex_interface, function);
     }
-    return NonPI::MutexLockWithTimeout(mutex, true, abs_timeout);
+    return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout);
+}
+
+int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) {
+    return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__);
+}
+
+int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface,
+                                         const struct timespec* abs_timeout) {
+    return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__);
 }
 
 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {