Add %b and %B support to the scanf/wscanf and strto*/wcsto* families. Coming to C23 via WG14 N2630. This one is a little interesting, because it actually changes existing behavior. Previously "0b101" would be parsed as "0", "b", "101" by these functions. I'm led to believe that glibc plans to actually have separate versions of these functions for C23 and pre-C23, so callers can have the behavior they (implicitly) specify by virtue of which -std= they compile with. Android has never really done anything like that, and I'm pretty sure app developers have more than enough to worry about with API levels without having to deal with the cartesian product of API level and C standard. Therefore, my plan A is "if you're running on Android >= U, you get C23 behavior". My plan B in the (I think unlikely) event that that actually causes trouble for anyone is "if you're _targeting_ Android >= U, you get C23 behavior". I don't think we'd actually want to have two versions of each of these functions under any circumstances --- that seems by far the most confusing option. Test: treehugger Change-Id: I0bbb30315d3fabd306905ad1484361f5d8745935

commit: 1f462dec34a5358c3e63d6a8e986a82248338aed [log] [tgz]
author: Elliott Hughes <enh@google.com> Fri Aug 05 22:51:05 2022 +0000
committer: Elliott Hughes <enh@google.com> Thu Aug 11 00:25:08 2022 +0000
tree: 2f1534308da43c4e91b3c96e0d9d556efec547bd
parent: cede011a2c7d9d1d3679f05fe0270b2bfea558bc [diff] [blame]
diff --git a/libc/stdio/vfscanf.cpp b/libc/stdio/vfscanf.cpp
index 424c4ef..d05a3a6 100644
--- a/libc/stdio/vfscanf.cpp
+++ b/libc/stdio/vfscanf.cpp

@@ -69,7 +69,8 @@
 #define HAVESIGN   0x04000 // Sign detected
 #define NDIGITS    0x08000 // No digits detected
 #define PFXOK      0x10000 // "0x" prefix is (still) legal
-#define NZDIGITS   0x20000 // No zero digits detected
+#define PFBOK      0x20000 // "0b" prefix is (still) legal
+#define NZDIGITS   0x40000 // No zero digits detected
 
 // Conversion types.
 #define CT_CHAR 0   // %c conversion
@@ -101,9 +102,6 @@
   void* allocation = nullptr; // Allocated but unassigned result for %mc/%ms/%m[.
   size_t capacity = 0; // Number of char/wchar_t units allocated in `allocation`.
 
-  /* `basefix' is used to avoid `if' tests in the integer scanner */
-  static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
-
   _SET_ORIENTATION(fp, -1);
 
   nassigned = 0;
@@ -188,6 +186,12 @@
        * Conversions.
        * Those marked `compat' are for 4.[123]BSD compatibility.
        */
+      case 'b':
+        c = CT_INT;
+        base = 2;
+        flags |= PFBOK; /* enable 0b prefixing */
+        break;
+
       case 'D': /* compat */
         flags |= LONG;
         __BIONIC_FALLTHROUGH;
@@ -558,7 +562,7 @@
              * digits (zero or nonzero) have been
              * scanned (only signs), we will have
              * base==0.  In that case, we should set
-             * it to 8 and enable 0x prefixing.
+             * it to 8 and enable 0b/0x prefixing.
              * Also, if we have not scanned zero digits
              * before this, do not turn off prefixing
              * (someone else will turn it off if we
@@ -567,15 +571,24 @@
             case '0':
               if (base == 0) {
                 base = 8;
-                flags |= PFXOK;
+                flags |= PFBOK | PFXOK;
               }
-              if (flags & NZDIGITS)
+              if (flags & NZDIGITS) {
                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
-              else
-                flags &= ~(SIGNOK | PFXOK | NDIGITS);
+              } else {
+                flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
+              }
               goto ok;
-
-            /* 1 through 7 always legal */
+            case 'B':
+            case 'b':
+              // Is this 'b' or 'B' potentially part of an "0b" prefix?
+              if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
+                base = 2;
+                flags &= ~PFBOK;
+                goto ok;
+              }
+              // No? Fall through and see if it's a hex digit instead then...
+              __BIONIC_FALLTHROUGH;
             case '1':
             case '2':
             case '3':
@@ -583,34 +596,21 @@
             case '5':
             case '6':
             case '7':
-              base = basefix[base];
-              flags &= ~(SIGNOK | PFXOK | NDIGITS);
-              goto ok;
-
-            /* digits 8 and 9 ok iff decimal or hex */
             case '8':
             case '9':
-              base = basefix[base];
-              if (base <= 8) break; /* not legal here */
-              flags &= ~(SIGNOK | PFXOK | NDIGITS);
-              goto ok;
-
-            /* letters ok iff hex */
             case 'A':
-            case 'B':
             case 'C':
             case 'D':
             case 'E':
             case 'F':
             case 'a':
-            case 'b':
             case 'c':
             case 'd':
             case 'e':
             case 'f':
-              /* no need to fix base here */
-              if (base <= 10) break; /* not legal here */
-              flags &= ~(SIGNOK | PFXOK | NDIGITS);
+              if (base == 0) base = 10;
+              if (base != 16 && (c - '0') >= base) break; /* not legal here */
+              flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
               goto ok;
 
             /* sign ok only as first character */
@@ -653,17 +653,16 @@
             break; /* EOF */
         }
         /*
-         * If we had only a sign, it is no good; push
-         * back the sign.  If the number ends in `x',
-         * it was [sign] '0' 'x', so push back the x
-         * and treat it as [sign] '0'.
+         * If we had only a sign, it is no good; push back the sign.
+         * If the number was `[-+]0[BbXx]`, push back and treat it
+         * as `[-+]0`.
          */
         if (flags & NDIGITS) {
           if (p > buf) (void)ungetc(*(u_char*)--p, fp);
           goto match_failure;
         }
         c = ((u_char*)p)[-1];
-        if (c == 'x' || c == 'X') {
+        if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
           --p;
           (void)ungetc(c, fp);
         }
commit	1f462dec34a5358c3e63d6a8e986a82248338aed	[log] [tgz]
author	Elliott Hughes <enh@google.com>	Fri Aug 05 22:51:05 2022 +0000
committer	Elliott Hughes <enh@google.com>	Thu Aug 11 00:25:08 2022 +0000
tree	2f1534308da43c4e91b3c96e0d9d556efec547bd
parent	cede011a2c7d9d1d3679f05fe0270b2bfea558bc [diff] [blame]