<bionic/macros.h>: replace align_up()/align_down() with clang's builtins.

Change-Id: If39cf028e9a22fc2008ee9d1ba683a4d0d038325
diff --git a/libc/bionic/elf_note.cpp b/libc/bionic/elf_note.cpp
index 9cc6b21..efe3844 100644
--- a/libc/bionic/elf_note.cpp
+++ b/libc/bionic/elf_note.cpp
@@ -47,11 +47,11 @@
     const ElfW(Nhdr)* note = reinterpret_cast<const ElfW(Nhdr)*>(p);
     p += sizeof(ElfW(Nhdr));
     const char* name = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_namesz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_namesz, 4), &p)) {
       return false;
     }
     const char* desc = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_descsz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_descsz, 4), &p)) {
       return false;
     }
     if (p > note_end) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 3fa8ee6..1bd2da7 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -129,7 +129,7 @@
   // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
   // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
   char* scs_aligned_guard_region =
-      reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
+      reinterpret_cast<char*>(__builtin_align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
 
   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
@@ -296,7 +296,7 @@
   // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
+  stack_top = __builtin_align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   if (!stack_clean) {
diff --git a/libc/platform/bionic/macros.h b/libc/platform/bionic/macros.h
index 1e7ca88..c4af3b9 100644
--- a/libc/platform/bionic/macros.h
+++ b/libc/platform/bionic/macros.h
@@ -32,24 +32,6 @@
     ? (1UL << (64 - __builtin_clzl(static_cast<unsigned long>(value)))) \
     : (1UL << (32 - __builtin_clz(static_cast<unsigned int>(value)))))
 
-static constexpr uintptr_t align_down(uintptr_t p, size_t align) {
-  return p & ~(align - 1);
-}
-
-static constexpr uintptr_t align_up(uintptr_t p, size_t align) {
-  return (p + align - 1) & ~(align - 1);
-}
-
-template <typename T>
-static inline T* _Nonnull align_down(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_down(reinterpret_cast<uintptr_t>(p), align));
-}
-
-template <typename T>
-static inline T* _Nonnull align_up(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_up(reinterpret_cast<uintptr_t>(p), align));
-}
-
 #if defined(__arm__)
 #define BIONIC_STOP_UNWIND asm volatile(".cfi_undefined r14")
 #elif defined(__aarch64__)
diff --git a/libc/private/CFIShadow.h b/libc/private/CFIShadow.h
index cbdf0f7..b40c063 100644
--- a/libc/private/CFIShadow.h
+++ b/libc/private/CFIShadow.h
@@ -40,7 +40,7 @@
 // below) are interpreted as follows.
 //
 // For an address P and corresponding shadow value V, the address of __cfi_check is calculated as
-//   align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
+//   __builtin_align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
 //
 // Special shadow values:
 //        0 = kInvalidShadow, this memory range has no valid CFI targets.