Microoptimize the strtol() family.
The main change here is to remember that we arm64/x86-64 have flags, and
__builtin_<op>_overflow() lets us look at them. There's a clear saving
for arm64, and x86-64 is better too, though interestingly there the
unsigned case doesn't work out as well as the signed case because both
unsigned __builtin_mul_overflow and unsigned __builtin_add_overflow are
less efficient than the signed multiply and subtract on that
architecture, but the new code still beats the old code even so.
There's a very tiny microoptimization of the hex path that takes
advantage of the fact that conversion to lowercase is a single
instruction on all our architectures when we already know we're dealing
with a letter.
This also merges the signed and unsigned variants of the code. Not
entirely successfully, but the vast majority of the code benefits.
Before (arm64):
```
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_inttypes_strtoimax 44.6 ns 44.3 ns 15807654
BM_inttypes_strtoumax 43.1 ns 42.8 ns 16348848
BM_stdlib_strtol 44.6 ns 44.3 ns 15805384
BM_stdlib_strtol_hex 85.5 ns 85.0 ns 8235487
BM_stdlib_strtoll 44.5 ns 44.2 ns 15833137
BM_stdlib_strtoul 43.1 ns 42.8 ns 16353963
BM_stdlib_strtoul_hex 83.1 ns 82.6 ns 8477732
BM_stdlib_strtoull 43.1 ns 42.8 ns 16353015
```
After (arm64):
```
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_inttypes_strtoimax 37.9 ns 37.6 ns 17657577
BM_inttypes_strtoumax 35.9 ns 35.7 ns 19597727
BM_stdlib_strtol 36.9 ns 36.7 ns 19093037
BM_stdlib_strtol_hex 70.7 ns 70.3 ns 9961626
BM_stdlib_strtoll 36.9 ns 36.7 ns 19093032
BM_stdlib_strtoul 35.9 ns 35.7 ns 19617784
BM_stdlib_strtoul_hex 67.7 ns 67.3 ns 10113521
BM_stdlib_strtoull 35.9 ns 35.7 ns 19621828
```
Test: treehugger
Change-Id: Ibf53b29e34d63ac31520c6d27ef80ff281899d61
diff --git a/libc/bionic/strtol.cpp b/libc/bionic/strtol.cpp
index 05b4b53..def7921 100644
--- a/libc/bionic/strtol.cpp
+++ b/libc/bionic/strtol.cpp
@@ -35,141 +35,88 @@
#include <wchar.h>
template <typename T, T Min, T Max, typename CharT>
-T StrToI(const CharT* nptr, CharT** endptr, int base) {
+__attribute__((always_inline)) T StrToI(const CharT* s, CharT** end_ptr, int base) {
// Ensure that base is between 2 and 36 inclusive, or the special value of 0.
if (base < 0 || base == 1 || base > 36) {
- if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
+ if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(s);
errno = EINVAL;
return 0;
}
// Skip white space and pick up leading +/- sign if any.
- // If base is 0, allow 0x for hex and 0 for octal, else
- // assume decimal; if base is already 16, allow 0x.
- const CharT* s = nptr;
+ const CharT* p = s;
int c;
- do {
- c = *s++;
- } while (isspace(c));
- int neg;
- if (c == '-') {
- neg = 1;
- c = *s++;
- } else {
- neg = 0;
- if (c == '+') c = *s++;
+ while (isspace(c = *p++)) {
}
- if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
- c = s[1];
- s += 2;
+ bool neg = false;
+ if (c == '-') {
+ neg = true;
+ c = *p++;
+ } else if (c == '+') {
+ c = *p++;
+ }
+
+ // If base is 0 or 16, allow "0x" prefix for hex.
+ if ((base == 0 || base == 16) && c == '0' && (*p == 'x' || *p == 'X') && isxdigit(p[1])) {
+ c = p[1];
+ p += 2;
base = 16;
}
- if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
- c = s[1];
- s += 2;
+ // If base is 0 or 2, allow "0b" prefix for binary.
+ if ((base == 0 || base == 2) && c == '0' && (*p == 'b' || *p == 'B') && isdigit(p[1])) {
+ c = p[1];
+ p += 2;
base = 2;
}
+ // If base is 0, allow "0" prefix for octal, otherwise base is 10.
if (base == 0) base = (c == '0') ? 8 : 10;
- // We always work in the negative space because the most negative value has a
- // larger magnitude than the most positive value.
- T cutoff = Min / base;
- int cutlim = -(Min % base);
+ constexpr bool is_signed = (Min != 0);
+ T acc = 0;
// Non-zero if any digits consumed; negative to indicate overflow/underflow.
int any = 0;
- T acc = 0;
- for (; ; c = *s++) {
+ for (;; c = *p++) {
if (isdigit(c)) {
c -= '0';
} else if (isalpha(c)) {
- c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ c = 10 + (_tolower(c) - 'a');
} else {
break;
}
if (c >= base) break;
if (any < 0) continue;
- if (acc < cutoff || (acc == cutoff && c > cutlim)) {
- any = -1;
- acc = Min;
- errno = ERANGE;
+ if (is_signed) {
+ // We work in the negative space because the most negative value has a
+ // larger magnitude than the most positive value.
+ if (__builtin_mul_overflow(acc, base, &acc) || __builtin_sub_overflow(acc, c, &acc)) {
+ any = -1;
+ continue;
+ }
} else {
- any = 1;
- acc *= base;
- acc -= c;
+ if (__builtin_mul_overflow(acc, base, &acc) || __builtin_add_overflow(acc, c, &acc)) {
+ any = -1;
+ continue;
+ }
}
- }
- if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
- if (!neg) {
- if (acc == Min) {
- errno = ERANGE;
- acc = Max;
- } else {
- acc = -acc;
- }
- }
- return acc;
-}
-
-template <typename T, T Max, typename CharT>
-T StrToU(const CharT* nptr, CharT** endptr, int base) {
- if (base < 0 || base == 1 || base > 36) {
- if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
- errno = EINVAL;
- return 0;
+ any = 1;
}
- const CharT* s = nptr;
- int c;
- do {
- c = *s++;
- } while (isspace(c));
- int neg;
- if (c == '-') {
- neg = 1;
- c = *s++;
- } else {
- neg = 0;
- if (c == '+') c = *s++;
- }
- if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
- c = s[1];
- s += 2;
- base = 16;
- }
- if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
- c = s[1];
- s += 2;
- base = 2;
- }
- if (base == 0) base = (c == '0') ? 8 : 10;
+ if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(any ? p - 1 : s);
- T cutoff = Max / static_cast<T>(base);
- int cutlim = Max % static_cast<T>(base);
- T acc = 0;
- int any = 0;
- for (; ; c = *s++) {
- if (isdigit(c)) {
- c -= '0';
- } else if (isalpha(c)) {
- c -= isupper(c) ? 'A' - 10 : 'a' - 10;
- } else {
- break;
- }
- if (c >= base) break;
- if (any < 0) continue;
- if (acc > cutoff || (acc == cutoff && c > cutlim)) {
- any = -1;
- acc = Max;
- errno = ERANGE;
- } else {
- any = 1;
- acc *= base;
- acc += c;
- }
+ // Detected overflow/underflow in the loop?
+ if (any == -1) {
+ errno = ERANGE;
+ return (is_signed && neg) ? Min : Max;
}
- if (neg && any > 0) acc = -acc;
- if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
- return acc;
+
+ // Will we overflow by trying to negate the most negative value?
+ if (any > 0 && is_signed && !neg && acc == Min) {
+ errno = ERANGE;
+ return Max;
+ }
+
+ if (is_signed) return neg ? acc : -acc;
+ return neg ? -acc : acc;
}
int atoi(const char* s) {
@@ -209,25 +156,25 @@
}
unsigned long strtoul(const char* s, char** end, int base) {
- return StrToU<unsigned long, ULONG_MAX, char>(s, end, base);
+ return StrToI<unsigned long, 0, ULONG_MAX, char>(s, end, base);
}
unsigned long wcstoul(const wchar_t* s, wchar_t** end, int base) {
- return StrToU<unsigned long, ULONG_MAX, wchar_t>(s, end, base);
+ return StrToI<unsigned long, 0, ULONG_MAX, wchar_t>(s, end, base);
}
unsigned long long strtoull(const char* s, char** end, int base) {
- return StrToU<unsigned long long, ULLONG_MAX, char>(s, end, base);
+ return StrToI<unsigned long long, 0, ULLONG_MAX, char>(s, end, base);
}
unsigned long long wcstoull(const wchar_t* s, wchar_t** end, int base) {
- return StrToU<unsigned long long, ULLONG_MAX, wchar_t>(s, end, base);
+ return StrToI<unsigned long long, 0, ULLONG_MAX, wchar_t>(s, end, base);
}
uintmax_t strtoumax(const char* s, char** end, int base) {
- return StrToU<uintmax_t, UINTMAX_MAX, char>(s, end, base);
+ return StrToI<uintmax_t, 0, UINTMAX_MAX, char>(s, end, base);
}
uintmax_t wcstoumax(const wchar_t* s, wchar_t** end, int base) {
- return StrToU<uintmax_t, UINTMAX_MAX, wchar_t>(s, end, base);
+ return StrToI<uintmax_t, 0, UINTMAX_MAX, wchar_t>(s, end, base);
}