<bionic/macros.h>: replace align_up()/align_down() with clang's builtins.

Change-Id: If39cf028e9a22fc2008ee9d1ba683a4d0d038325
diff --git a/libc/bionic/elf_note.cpp b/libc/bionic/elf_note.cpp
index 9cc6b21..efe3844 100644
--- a/libc/bionic/elf_note.cpp
+++ b/libc/bionic/elf_note.cpp
@@ -47,11 +47,11 @@
     const ElfW(Nhdr)* note = reinterpret_cast<const ElfW(Nhdr)*>(p);
     p += sizeof(ElfW(Nhdr));
     const char* name = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_namesz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_namesz, 4), &p)) {
       return false;
     }
     const char* desc = reinterpret_cast<const char*>(p);
-    if (__builtin_add_overflow(p, align_up(note->n_descsz, 4), &p)) {
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_descsz, 4), &p)) {
       return false;
     }
     if (p > note_end) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 3fa8ee6..1bd2da7 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -129,7 +129,7 @@
   // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
   // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
   char* scs_aligned_guard_region =
-      reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
+      reinterpret_cast<char*>(__builtin_align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
 
   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
@@ -296,7 +296,7 @@
   // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
+  stack_top = __builtin_align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   if (!stack_clean) {
diff --git a/libc/platform/bionic/macros.h b/libc/platform/bionic/macros.h
index 1e7ca88..c4af3b9 100644
--- a/libc/platform/bionic/macros.h
+++ b/libc/platform/bionic/macros.h
@@ -32,24 +32,6 @@
     ? (1UL << (64 - __builtin_clzl(static_cast<unsigned long>(value)))) \
     : (1UL << (32 - __builtin_clz(static_cast<unsigned int>(value)))))
 
-static constexpr uintptr_t align_down(uintptr_t p, size_t align) {
-  return p & ~(align - 1);
-}
-
-static constexpr uintptr_t align_up(uintptr_t p, size_t align) {
-  return (p + align - 1) & ~(align - 1);
-}
-
-template <typename T>
-static inline T* _Nonnull align_down(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_down(reinterpret_cast<uintptr_t>(p), align));
-}
-
-template <typename T>
-static inline T* _Nonnull align_up(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_up(reinterpret_cast<uintptr_t>(p), align));
-}
-
 #if defined(__arm__)
 #define BIONIC_STOP_UNWIND asm volatile(".cfi_undefined r14")
 #elif defined(__aarch64__)
diff --git a/libc/private/CFIShadow.h b/libc/private/CFIShadow.h
index cbdf0f7..b40c063 100644
--- a/libc/private/CFIShadow.h
+++ b/libc/private/CFIShadow.h
@@ -40,7 +40,7 @@
 // below) are interpreted as follows.
 //
 // For an address P and corresponding shadow value V, the address of __cfi_check is calculated as
-//   align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
+//   __builtin_align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
 //
 // Special shadow values:
 //        0 = kInvalidShadow, this memory range has no valid CFI targets.
diff --git a/libdl/libdl_cfi.cpp b/libdl/libdl_cfi.cpp
index 8adc342..e096f9a 100644
--- a/libdl/libdl_cfi.cpp
+++ b/libdl/libdl_cfi.cpp
@@ -55,8 +55,8 @@
   uintptr_t addr = reinterpret_cast<uintptr_t>(Ptr);
   // The aligned range of [0, kShadowAlign) uses a single shadow element, therefore all pointers in
   // this range must get the same aligned_addr below. This matches CFIShadowWriter::Add; not the
-  // same as align_up().
-  uintptr_t aligned_addr = align_down(addr, CFIShadow::kShadowAlign) + CFIShadow::kShadowAlign;
+  // same as just __builtin_align_up().
+  uintptr_t aligned_addr = __builtin_align_down(addr, CFIShadow::kShadowAlign) + CFIShadow::kShadowAlign;
   uintptr_t p = aligned_addr - (static_cast<uintptr_t>(v - CFIShadow::kRegularShadowMin)
                                 << CFIShadow::kCfiCheckGranularity);
 #ifdef __arm__
diff --git a/linker/linker_note_gnu_property.cpp b/linker/linker_note_gnu_property.cpp
index 082a604..d221b8d 100644
--- a/linker/linker_note_gnu_property.cpp
+++ b/linker/linker_note_gnu_property.cpp
@@ -137,7 +137,7 @@
     // Loop on program property array.
     const ElfW(Prop)* property = reinterpret_cast<const ElfW(Prop)*>(&note_nhdr->n_desc[offset]);
     const ElfW(Word) property_size =
-        align_up(sizeof(ElfW(Prop)) + property->pr_datasz, sizeof(ElfW(Addr)));
+        __builtin_align_up(sizeof(ElfW(Prop)) + property->pr_datasz, sizeof(ElfW(Addr)));
     if ((note_nhdr->nhdr.n_descsz - offset) < property_size) {
       DL_ERR_AND_LOG(
           "\"%s\" .note.gnu.property: property descriptor size is "
diff --git a/linker/linker_note_gnu_property_test.cpp b/linker/linker_note_gnu_property_test.cpp
index 960118c..2a5eddc 100644
--- a/linker/linker_note_gnu_property_test.cpp
+++ b/linker/linker_note_gnu_property_test.cpp
@@ -107,7 +107,7 @@
   template <typename T>
   bool push(ElfW(Word) pr_type, ElfW(Word) pr_datasz, const T* pr_data) {
     // Must be aligned.
-    const uintptr_t addition = align_up(pr_datasz, sizeof(ElfW(Addr)));
+    const uintptr_t addition = __builtin_align_up(pr_datasz, sizeof(ElfW(Addr)));
     if ((offset() + addition) > kMaxSectionSize) {
       return false;
     }
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 8bcd76c..f3b0f3d 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -606,23 +606,22 @@
   // page size of the platform.
 #if defined(__LP64__)
   constexpr size_t kGapAlignment = 2 * 1024 * 1024;
-#else
-  constexpr size_t kGapAlignment = 0;
 #endif
   // Maximum gap size, in the units of kGapAlignment.
   constexpr size_t kMaxGapUnits = 32;
   // Allocate enough space so that the end of the desired region aligned up is still inside the
   // mapping.
-  size_t mmap_size = align_up(size, mapping_align) + mapping_align - page_size();
+  size_t mmap_size = __builtin_align_up(size, mapping_align) + mapping_align - page_size();
   uint8_t* mmap_ptr =
       reinterpret_cast<uint8_t*>(mmap(nullptr, mmap_size, PROT_NONE, mmap_flags, -1, 0));
   if (mmap_ptr == MAP_FAILED) {
     return nullptr;
   }
   size_t gap_size = 0;
-  size_t first_byte = reinterpret_cast<size_t>(align_up(mmap_ptr, mapping_align));
-  size_t last_byte = reinterpret_cast<size_t>(align_down(mmap_ptr + mmap_size, mapping_align) - 1);
-  if (kGapAlignment && first_byte / kGapAlignment != last_byte / kGapAlignment) {
+  size_t first_byte = reinterpret_cast<size_t>(__builtin_align_up(mmap_ptr, mapping_align));
+  size_t last_byte = reinterpret_cast<size_t>(__builtin_align_down(mmap_ptr + mmap_size, mapping_align) - 1);
+#if defined(__LP64__)
+  if (first_byte / kGapAlignment != last_byte / kGapAlignment) {
     // This library crosses a 2MB boundary and will fragment a new huge page.
     // Lets take advantage of that and insert a random number of inaccessible huge pages before that
     // to improve address randomization and make it harder to locate this library code by probing.
@@ -630,23 +629,24 @@
     mapping_align = std::max(mapping_align, kGapAlignment);
     gap_size =
         kGapAlignment * (is_first_stage_init() ? 1 : arc4random_uniform(kMaxGapUnits - 1) + 1);
-    mmap_size = align_up(size + gap_size, mapping_align) + mapping_align - page_size();
+    mmap_size = __builtin_align_up(size + gap_size, mapping_align) + mapping_align - page_size();
     mmap_ptr = reinterpret_cast<uint8_t*>(mmap(nullptr, mmap_size, PROT_NONE, mmap_flags, -1, 0));
     if (mmap_ptr == MAP_FAILED) {
       return nullptr;
     }
   }
+#endif
 
-  uint8_t *gap_end, *gap_start;
+  uint8_t* gap_end = mmap_ptr + mmap_size;
+#if defined(__LP64__)
   if (gap_size) {
-    gap_end = align_down(mmap_ptr + mmap_size, kGapAlignment);
-    gap_start = gap_end - gap_size;
-  } else {
-    gap_start = gap_end = mmap_ptr + mmap_size;
+    gap_end = __builtin_align_down(gap_end, kGapAlignment);
   }
+#endif
+  uint8_t* gap_start = gap_end - gap_size;
 
-  uint8_t* first = align_up(mmap_ptr, mapping_align);
-  uint8_t* last = align_down(gap_start, mapping_align) - size;
+  uint8_t* first = __builtin_align_up(mmap_ptr, mapping_align);
+  uint8_t* last = __builtin_align_down(gap_start, mapping_align) - size;
 
   // arc4random* is not available in first stage init because /dev/urandom hasn't yet been
   // created. Don't randomize then.
@@ -1017,7 +1017,7 @@
     ElfW(Addr) seg_start = phdr->p_vaddr + load_bias_;
     ElfW(Addr) seg_end = seg_start + p_memsz;
 
-    ElfW(Addr) seg_page_end = align_up(seg_end, seg_align);
+    ElfW(Addr) seg_page_end = __builtin_align_up(seg_end, seg_align);
 
     ElfW(Addr) seg_file_end = seg_start + p_filesz;
 
@@ -1025,7 +1025,7 @@
     ElfW(Addr) file_start = phdr->p_offset;
     ElfW(Addr) file_end = file_start + p_filesz;
 
-    ElfW(Addr) file_page_start = align_down(file_start, seg_align);
+    ElfW(Addr) file_page_start = __builtin_align_down(file_start, seg_align);
     ElfW(Addr) file_length = file_end - file_page_start;
 
     if (file_size_ <= 0) {
diff --git a/linker/linker_phdr_16kib_compat.cpp b/linker/linker_phdr_16kib_compat.cpp
index bad20ba..d3783cf 100644
--- a/linker/linker_phdr_16kib_compat.cpp
+++ b/linker/linker_phdr_16kib_compat.cpp
@@ -158,7 +158,7 @@
   }
 
   if (!relro_phdr) {
-    *vaddr = align_down(first_rw->p_vaddr, kCompatPageSize);
+    *vaddr = __builtin_align_down(first_rw->p_vaddr, kCompatPageSize);
     return true;
   }
 
@@ -175,7 +175,7 @@
     return false;
   }
 
-  *vaddr = align_up(end, kCompatPageSize);
+  *vaddr = __builtin_align_up(end, kCompatPageSize);
   return true;
 }
 
@@ -227,11 +227,11 @@
   // will lead to overwriting adjacent segments since the ELF's segment(s)
   // are not 16KiB aligned.
 
-  void* start = reinterpret_cast<void*>(align_down(phdr->p_vaddr + load_bias_, kCompatPageSize));
+  void* start = reinterpret_cast<void*>(__builtin_align_down(phdr->p_vaddr + load_bias_, kCompatPageSize));
 
   // The ELF could be being loaded directly from a zipped APK,
   // the zip offset must be added to find the segment offset.
-  const ElfW(Addr) offset = file_offset_ + align_down(phdr->p_offset, kCompatPageSize);
+  const ElfW(Addr) offset = file_offset_ + __builtin_align_down(phdr->p_offset, kCompatPageSize);
 
   CHECK(should_use_16kib_app_compat_);