<bionic/macros.h>: replace align_up()/align_down() with clang's builtins.

Change-Id: If39cf028e9a22fc2008ee9d1ba683a4d0d038325
diff --git a/libc/bionic/elf_note.cpp b/libc/bionic/elf_note.cpp
index 9cc6b21..efe3844 100644
--- a/libc/bionic/elf_note.cpp
+++ b/libc/bionic/elf_note.cpp
@@ -47,11 +47,11 @@
     const ElfW(Nhdr)* note = reinterpret_cast<const ElfW(Nhdr)*>(p);
     p += sizeof(ElfW(Nhdr));
     const char* name = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_namesz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_namesz, 4), &p)) {
       return false;
     }
     const char* desc = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_descsz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_descsz, 4), &p)) {
       return false;
     }
     if (p > note_end) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 3fa8ee6..1bd2da7 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -129,7 +129,7 @@
   // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
   // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
   char* scs_aligned_guard_region =
-      reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
+      reinterpret_cast<char*>(__builtin_align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
 
   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
@@ -296,7 +296,7 @@
   // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
+  stack_top = __builtin_align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   if (!stack_clean) {