Merge "Spell "calling convention" out in full." into main
diff --git a/libc/bionic/bionic_allocator.cpp b/libc/bionic/bionic_allocator.cpp
index 80e8b08..41baf8b 100644
--- a/libc/bionic/bionic_allocator.cpp
+++ b/libc/bionic/bionic_allocator.cpp
@@ -299,7 +299,7 @@
     log2_size = kSmallObjectMinSizeLog2;
   }
 
-  return get_small_object_allocator(log2_size)->alloc();
+  return get_small_object_allocator_unchecked(log2_size)->alloc();
 }
 
 void* BionicAllocator::alloc(size_t size) {
@@ -330,9 +330,10 @@
 inline page_info* BionicAllocator::get_page_info(void* ptr) {
   page_info* info = get_page_info_unchecked(ptr);
   if (memcmp(info->signature, kSignature, sizeof(kSignature)) != 0) {
-    async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
+    async_safe_fatal("invalid pointer %p (page signature %04x instead of %04x)", ptr,
+                     *reinterpret_cast<const unsigned*>(info->signature),
+                     *reinterpret_cast<const unsigned*>(kSignature));
   }
-
   return info;
 }
 
@@ -353,12 +354,7 @@
   if (info->type == kLargeObject) {
     old_size = info->allocated_size - (static_cast<char*>(ptr) - reinterpret_cast<char*>(info));
   } else {
-    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
-    if (allocator != info->allocator_addr) {
-      async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
-    }
-
-    old_size = allocator->get_block_size();
+    old_size = get_small_object_allocator(info, ptr)->get_block_size();
   }
 
   if (old_size < size) {
@@ -377,16 +373,10 @@
   }
 
   page_info* info = get_page_info(ptr);
-
   if (info->type == kLargeObject) {
     munmap(info, info->allocated_size);
   } else {
-    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
-    if (allocator != info->allocator_addr) {
-      async_safe_fatal("invalid pointer %p (invalid allocator address for the page)", ptr);
-    }
-
-    allocator->free(ptr);
+    get_small_object_allocator(info, ptr)->free(ptr);
   }
 }
 
@@ -402,7 +392,7 @@
     return info->allocated_size - (static_cast<char*>(ptr) - reinterpret_cast<char*>(info));
   }
 
-  BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+  BionicSmallObjectAllocator* allocator = get_small_object_allocator_unchecked(info->type);
   if (allocator != info->allocator_addr) {
     // Invalid pointer.
     return 0;
@@ -410,7 +400,7 @@
   return allocator->get_block_size();
 }
 
-BionicSmallObjectAllocator* BionicAllocator::get_small_object_allocator(uint32_t type) {
+BionicSmallObjectAllocator* BionicAllocator::get_small_object_allocator_unchecked(uint32_t type) {
   if (type < kSmallObjectMinSizeLog2 || type > kSmallObjectMaxSizeLog2) {
     async_safe_fatal("invalid type: %u", type);
   }
@@ -418,3 +408,11 @@
   initialize_allocators();
   return &allocators_[type - kSmallObjectMinSizeLog2];
 }
+
+BionicSmallObjectAllocator* BionicAllocator::get_small_object_allocator(page_info* pi, void* ptr) {
+  BionicSmallObjectAllocator* result = get_small_object_allocator_unchecked(pi->type);
+  if (result != pi->allocator_addr) {
+    async_safe_fatal("invalid pointer %p (invalid allocator address for the page)", ptr);
+  }
+  return result;
+}
\ No newline at end of file
diff --git a/libc/bionic/heap_tagging.cpp b/libc/bionic/heap_tagging.cpp
index c8a025f..cadab3c 100644
--- a/libc/bionic/heap_tagging.cpp
+++ b/libc/bionic/heap_tagging.cpp
@@ -34,6 +34,7 @@
 #include <platform/bionic/malloc.h>
 #include <sanitizer/hwasan_interface.h>
 #include <sys/auxv.h>
+#include <sys/prctl.h>
 
 extern "C" void scudo_malloc_disable_memory_tagging();
 extern "C" void scudo_malloc_set_track_allocation_stacks(int);
diff --git a/libc/bionic/malloc_limit.cpp b/libc/bionic/malloc_limit.cpp
index deb63f4..5128a35 100644
--- a/libc/bionic/malloc_limit.cpp
+++ b/libc/bionic/malloc_limit.cpp
@@ -31,6 +31,7 @@
 #include <stdatomic.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <unistd.h>
 
 #include <private/bionic_malloc_dispatch.h>
 
diff --git a/libc/bionic/wcwidth.cpp b/libc/bionic/wcwidth.cpp
index 9676b5a..4582ef7 100644
--- a/libc/bionic/wcwidth.cpp
+++ b/libc/bionic/wcwidth.cpp
@@ -52,12 +52,15 @@
     return -1;
    case U_NON_SPACING_MARK:
    case U_ENCLOSING_MARK:
-   case U_FORMAT_CHAR:
     return 0;
+   case U_FORMAT_CHAR:
+    // A special case for soft hyphen (U+00AD) to match historical practice.
+    // See the tests for more commentary.
+    return (wc == 0x00ad) ? 1 : 0;
   }
-  if (__icu_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, nullptr)) return 0;
 
-  // Medial and final jamo render as zero width when used correctly.
+  // Medial and final jamo render as zero width when used correctly,
+  // so we handle them specially rather than relying on East Asian Width.
   switch (__icu_getIntPropertyValue(wc, UCHAR_HANGUL_SYLLABLE_TYPE)) {
    case U_HST_VOWEL_JAMO:
    case U_HST_TRAILING_JAMO:
@@ -68,6 +71,11 @@
     return 2;
   }
 
+  // Hangeul choseong filler U+115F is default ignorable, so we check default
+  // ignorability only after we've already handled Hangeul jamo above.
+  if (__icu_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, nullptr)) return 0;
+
+  // A few weird special cases where EastAsianWidth is not helpful for us.
   if (wc >= 0x3248 && wc <= 0x4dff) {
     // Circled two-digit CJK "speed sign" numbers. EastAsianWidth is ambiguous,
     // but wide makes more sense.
@@ -77,6 +85,7 @@
   }
 
   // The EastAsianWidth property is at least defined by the Unicode standard!
+  // https://www.unicode.org/reports/tr11/
   switch (__icu_getIntPropertyValue(wc, UCHAR_EAST_ASIAN_WIDTH)) {
    case U_EA_AMBIGUOUS:
    case U_EA_HALFWIDTH:
diff --git a/libc/private/bionic_allocator.h b/libc/private/bionic_allocator.h
index 342fd51..9872669 100644
--- a/libc/private/bionic_allocator.h
+++ b/libc/private/bionic_allocator.h
@@ -28,13 +28,9 @@
 
 #pragma once
 
-#include <errno.h>
-#include <stdlib.h>
 #include <sys/cdefs.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
 #include <stddef.h>
-#include <unistd.h>
+#include <stdint.h>
 
 const uint32_t kSmallObjectMaxSizeLog2 = 10;
 const uint32_t kSmallObjectMinSizeLog2 = 4;
@@ -120,7 +116,8 @@
   inline void* alloc_impl(size_t align, size_t size);
   inline page_info* get_page_info_unchecked(void* ptr);
   inline page_info* get_page_info(void* ptr);
-  BionicSmallObjectAllocator* get_small_object_allocator(uint32_t type);
+  BionicSmallObjectAllocator* get_small_object_allocator_unchecked(uint32_t type);
+  BionicSmallObjectAllocator* get_small_object_allocator(page_info* pi, void* ptr);
   void initialize_allocators();
 
   BionicSmallObjectAllocator* allocators_;
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index e27fd91..2b230a8 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -31,6 +31,7 @@
 #include <link.h>
 #include <stdlib.h>
 #include <sys/auxv.h>
+#include <sys/prctl.h>
 
 #include "linker.h"
 #include "linker_auxv.h"
diff --git a/tests/wchar_test.cpp b/tests/wchar_test.cpp
index 5256b08..387d23b 100644
--- a/tests/wchar_test.cpp
+++ b/tests/wchar_test.cpp
@@ -1075,10 +1075,39 @@
 
   EXPECT_EQ(0, wcwidth(0x0300)); // Combining grave.
   EXPECT_EQ(0, wcwidth(0x20dd)); // Combining enclosing circle.
-  EXPECT_EQ(0, wcwidth(0x00ad)); // Soft hyphen (SHY).
   EXPECT_EQ(0, wcwidth(0x200b)); // Zero width space.
 }
 
+TEST(wchar, wcwidth_non_spacing_special_cases) {
+  if (!have_dl()) return;
+
+  // U+00AD is a soft hyphen, which normally shouldn't be rendered at all.
+  // I think the assumption here is that you elide the soft hyphen character
+  // completely in that case, and never call wcwidth() if you don't want to
+  // render it as an actual hyphen. Whereas if you do want to render it,
+  // you call wcwidth(), and 1 is the right answer. This is what Markus Kuhn's
+  // original https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c did,
+  // and glibc and iOS do the same.
+  // See also: https://en.wikipedia.org/wiki/Soft_hyphen#Text_to_be_formatted_by_the_recipient
+  EXPECT_EQ(1, wcwidth(0x00ad)); // Soft hyphen (SHY).
+
+  // U+115F is the Hangeul choseong filler (for a degenerate composed
+  // character missing an initial consonant (as opposed to one with a
+  // leading ieung). Since the code points for combining jungseong (medial
+  // vowels) and jongseong (trailing consonants) have width 0, the choseong
+  // (initial consonant) has width 2 to cover the entire syllable. So unless
+  // U+115f has width 2, a degenerate composed "syllable" without an initial
+  // consonant or ieung would have a total width of 0, which is silly.
+  // The following sequence is effectively "약" without the leading ieung...
+  EXPECT_EQ(2, wcwidth(0x115f)); // Hangeul choseong filler.
+  EXPECT_EQ(0, wcwidth(0x1163)); // Hangeul jungseong "ya".
+  EXPECT_EQ(0, wcwidth(0x11a8)); // Hangeul jongseong "kiyeok".
+
+  // U+1160, the jungseong filler, has width 0 because it must have been
+  // preceded by either a choseong or choseong filler.
+  EXPECT_EQ(0, wcwidth(0x1160));
+}
+
 TEST(wchar, wcwidth_cjk) {
   if (!have_dl()) return;
 
@@ -1102,8 +1131,10 @@
   if (!have_dl()) return;
 
   EXPECT_EQ(2, wcwidth(0xac00)); // Start of block.
-  EXPECT_EQ(2, wcwidth(0xd7a3)); // End of defined code points in Unicode 7.
-  // Undefined characters at the end of the block have width 1.
+  EXPECT_EQ(2, wcwidth(0xd7a3)); // End of defined code points as of Unicode 15.
+
+  // Undefined characters at the end of the block currently have width 1,
+  // but since they're undefined, we don't test that.
 }
 
 TEST(wchar, wcwidth_kana) {
@@ -1137,11 +1168,21 @@
   EXPECT_EQ(0, wcwidth(0xe0000)); // ...through 0xe0fff.
 }
 
-TEST(wchar, wcwidth_korean_common_non_syllables) {
+TEST(wchar, wcwidth_hangeul_compatibility_jamo) {
   if (!have_dl()) return;
 
-  EXPECT_EQ(2, wcwidth(L'ㅜ')); // Korean "crying" emoticon.
-  EXPECT_EQ(2, wcwidth(L'ㅋ')); // Korean "laughing" emoticon.
+  // These are actually the *compatibility* jamo code points, *not* the regular
+  // jamo code points (U+1100-U+11FF) using a jungseong filler. If you use the
+  // Android IME to type any of these, you get these code points.
+
+  // (Half of) the Korean "crying" emoticon "ㅠㅠ".
+  // Actually U+3160 "Hangeul Letter Yu" from Hangeul Compatibility Jamo.
+  EXPECT_EQ(2, wcwidth(L'ㅠ'));
+  // The two halves of the Korean internet shorthand "ㄱㅅ" (short for 감사).
+  // Actually U+3131 "Hangeul Letter Kiyeok" and U+3145 "Hangeul Letter Sios"
+  // from Hangeul Compatibility Jamo.
+  EXPECT_EQ(2, wcwidth(L'ㄱ'));
+  EXPECT_EQ(2, wcwidth(L'ㅅ'));
 }
 
 TEST(wchar, wcswidth) {