Sync with upstream NetBSD. Test: treehugger Change-Id: Ia82ba4a0c00a47377240e5d9a85dc30deb5ce3b7

commit: 88e4e60a75841db476b765742ea61fc0403abbed [log] [tgz]
author: Elliott Hughes <enh@google.com> Tue Nov 08 02:51:00 2022 +0000
committer: Elliott Hughes <enh@google.com> Tue Nov 08 02:51:48 2022 +0000
tree: 2c9b6b03fc32cd32ee9f0d164c7e2124d33159ef
parent: 09db6416bc061baf686fba2f50de7e96cb262785 [diff]
diff --git a/libc/upstream-netbsd/android/include/netbsd-compat.h b/libc/upstream-netbsd/android/include/netbsd-compat.h
index 5dd086e..a625f06 100644
--- a/libc/upstream-netbsd/android/include/netbsd-compat.h
+++ b/libc/upstream-netbsd/android/include/netbsd-compat.h

@@ -43,6 +43,8 @@
 #include <stddef.h>
 int reallocarr(void*, size_t, size_t);
 
+#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
+
 /* Use appropriate shell depending on process's executable. */
 __LIBC_HIDDEN__ extern const char* __bionic_get_shell_path();
 #define _PATH_BSHELL __bionic_get_shell_path()

diff --git a/libc/upstream-netbsd/lib/libc/include/isc/list.h b/libc/upstream-netbsd/lib/libc/include/isc/list.h
index 46f2e79..76dc097 100644
--- a/libc/upstream-netbsd/lib/libc/include/isc/list.h
+++ b/libc/upstream-netbsd/lib/libc/include/isc/list.h

@@ -1,4 +1,4 @@
-/*	$NetBSD: list.h,v 1.5 2009/04/12 17:07:16 christos Exp $	*/
+/*	$NetBSD: list.h,v 1.6 2022/04/19 20:32:15 rillig Exp $	*/
 
 /*
  * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
@@ -23,14 +23,14 @@
 
 #define LIST(type) struct { type *head, *tail; }
 #define INIT_LIST(list) \
-	do { (list).head = NULL; (list).tail = NULL; } while (/*CONSTCOND*/0)
+	do { (list).head = NULL; (list).tail = NULL; } while (0)
 
 #define LINK(type) struct { type *prev, *next; }
 #define INIT_LINK_TYPE(elt, link, type) \
 	do { \
 		(elt)->link.prev = (type *)(-1); \
 		(elt)->link.next = (type *)(-1); \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 #define INIT_LINK(elt, link) \
 	INIT_LINK_TYPE(elt, link, void)
 #define LINKED(elt, link) ((void *)((elt)->link.prev) != (void *)(-1) && \
@@ -50,7 +50,7 @@
 		(elt)->link.prev = NULL; \
 		(elt)->link.next = (list).head; \
 		(list).head = (elt); \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 
 #define APPEND(list, elt, link) \
 	do { \
@@ -62,7 +62,7 @@
 		(elt)->link.prev = (list).tail; \
 		(elt)->link.next = NULL; \
 		(list).tail = (elt); \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 
 #define UNLINK_TYPE(list, elt, link, type) \
 	do { \
@@ -80,7 +80,7 @@
 			(list).head = (elt)->link.next; \
 		} \
 		INIT_LINK_TYPE(elt, link, type); \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 #define UNLINK(list, elt, link) \
 	UNLINK_TYPE(list, elt, link, void)
 
@@ -98,7 +98,7 @@
 			(elt)->link.prev->link.next = (elt); \
 			(elt)->link.next = (before); \
 		} \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 
 #define INSERT_AFTER(list, after, elt, link) \
 	do { \
@@ -111,7 +111,7 @@
 			(elt)->link.next->link.prev = (elt); \
 			(elt)->link.prev = (after); \
 		} \
-	} while (/*CONSTCOND*/0)
+	} while (0)
 
 #define ENQUEUE(list, elt, link) APPEND(list, elt, link)
 #define DEQUEUE(list, elt, link) UNLINK(list, elt, link)

diff --git a/libc/upstream-netbsd/lib/libc/regex/cclass.h b/libc/upstream-netbsd/lib/libc/regex/cclass.h
deleted file mode 100644
index 3ab2ccb..0000000
--- a/libc/upstream-netbsd/lib/libc/regex/cclass.h
+++ /dev/null

@@ -1,104 +0,0 @@
-/*	$NetBSD: cclass.h,v 1.7 2003/08/07 16:43:19 agc Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
- */
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
- */
-
-/* character-class table */
-static const struct cclass {
-	const char *name;
-	const char *chars;
-	const char *multis;
-} cclasses[] = {
-	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789",				"" },
-	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-					"" },
-	{ "blank",	" \t",		"" },
-	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177",	"" },
-	{ "digit",	"0123456789",	"" },
-	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-					"" },
-	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
-					"" },
-	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
-					"" },
-	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-					"" },
-	{ "space",	"\t\n\v\f\r ",	"" },
-	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-					"" },
-	{ "xdigit",	"0123456789ABCDEFabcdef",
-					"" },
-	{ NULL,		0,		"" }
-};

diff --git a/libc/upstream-netbsd/lib/libc/regex/cname.h b/libc/upstream-netbsd/lib/libc/regex/cname.h
index 4b9ef39..47e57ac 100644
--- a/libc/upstream-netbsd/lib/libc/regex/cname.h
+++ b/libc/upstream-netbsd/lib/libc/regex/cname.h

@@ -1,6 +1,9 @@
-/*	$NetBSD: cname.h,v 1.7 2003/08/07 16:43:19 agc Exp $	*/
+/*	$NetBSD: cname.h,v 1.8 2021/02/23 22:14:59 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,144 +35,108 @@
  * SUCH DAMAGE.
  *
  *	@(#)cname.h	8.3 (Berkeley) 3/20/94
- */
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)cname.h	8.3 (Berkeley) 3/20/94
+ * $FreeBSD: head/lib/libc/regex/cname.h 326025 2017-11-20 19:49:47Z pfg $
  */
 
 /* character-name table */
-static const struct cname {
+static struct cname {
 	const char *name;
 	char code;
 } cnames[] = {
-	{ "NUL",			'\0' },
-	{ "SOH",			'\001' },
-	{ "STX",			'\002' },
-	{ "ETX",			'\003' },
-	{ "EOT",			'\004' },
-	{ "ENQ",			'\005' },
-	{ "ACK",			'\006' },
-	{ "BEL",			'\007' },
-	{ "alert",			'\007' },
-	{ "BS",				'\010' },
-	{ "backspace",			'\b' },
-	{ "HT",				'\011' },
-	{ "tab",			'\t' },
-	{ "LF",				'\012' },
-	{ "newline",			'\n' },
-	{ "VT",				'\013' },
-	{ "vertical-tab",		'\v' },
-	{ "FF",				'\014' },
-	{ "form-feed",			'\f' },
-	{ "CR",				'\015' },
-	{ "carriage-return",		'\r' },
-	{ "SO",				'\016' },
-	{ "SI",				'\017' },
-	{ "DLE",			'\020' },
-	{ "DC1",			'\021' },
-	{ "DC2",			'\022' },
-	{ "DC3",			'\023' },
-	{ "DC4",			'\024' },
-	{ "NAK",			'\025' },
-	{ "SYN",			'\026' },
-	{ "ETB",			'\027' },
-	{ "CAN",			'\030' },
-	{ "EM",				'\031' },
-	{ "SUB",			'\032' },
-	{ "ESC",			'\033' },
-	{ "IS4",			'\034' },
-	{ "FS",				'\034' },
-	{ "IS3",			'\035' },
-	{ "GS",				'\035' },
-	{ "IS2",			'\036' },
-	{ "RS",				'\036' },
-	{ "IS1",			'\037' },
-	{ "US",				'\037' },
-	{ "space",			' ' },
-	{ "exclamation-mark",		'!' },
-	{ "quotation-mark",		'"' },
-	{ "number-sign",		'#' },
-	{ "dollar-sign",		'$' },
-	{ "percent-sign",		'%' },
-	{ "ampersand",			'&' },
-	{ "apostrophe",			'\'' },
-	{ "left-parenthesis",		'(' },
-	{ "right-parenthesis",		')' },
-	{ "asterisk",			'*' },
-	{ "plus-sign",			'+' },
-	{ "comma",			',' },
-	{ "hyphen",			'-' },
-	{ "hyphen-minus",		'-' },
-	{ "period",			'.' },
-	{ "full-stop",			'.' },
-	{ "slash",			'/' },
-	{ "solidus",			'/' },
-	{ "zero",			'0' },
-	{ "one",			'1' },
-	{ "two",			'2' },
-	{ "three",			'3' },
-	{ "four",			'4' },
-	{ "five",			'5' },
-	{ "six",			'6' },
-	{ "seven",			'7' },
-	{ "eight",			'8' },
-	{ "nine",			'9' },
-	{ "colon",			':' },
-	{ "semicolon",			';' },
-	{ "less-than-sign",		'<' },
-	{ "equals-sign",		'=' },
-	{ "greater-than-sign",		'>' },
-	{ "question-mark",		'?' },
-	{ "commercial-at",		'@' },
-	{ "left-square-bracket",	'[' },
-	{ "backslash",			'\\' },
-	{ "reverse-solidus",		'\\' },
-	{ "right-square-bracket",	']' },
-	{ "circumflex",			'^' },
-	{ "circumflex-accent",		'^' },
-	{ "underscore",			'_' },
-	{ "low-line",			'_' },
-	{ "grave-accent",		'`' },
-	{ "left-brace",			'{' },
-	{ "left-curly-bracket",		'{' },
-	{ "vertical-line",		'|' },
-	{ "right-brace",		'}' },
-	{ "right-curly-bracket",	'}' },
-	{ "tilde",			'~' },
-	{ "DEL",			'\177' },
-	{ NULL,				0 },
+	{"NUL",			'\0'},
+	{"SOH",			'\001'},
+	{"STX",			'\002'},
+	{"ETX",			'\003'},
+	{"EOT",			'\004'},
+	{"ENQ",			'\005'},
+	{"ACK",			'\006'},
+	{"BEL",			'\007'},
+	{"alert",		'\007'},
+	{"BS",			'\010'},
+	{"backspace",		'\b'},
+	{"HT",			'\011'},
+	{"tab",			'\t'},
+	{"LF",			'\012'},
+	{"newline",		'\n'},
+	{"VT",			'\013'},
+	{"vertical-tab",	'\v'},
+	{"FF",			'\014'},
+	{"form-feed",		'\f'},
+	{"CR",			'\015'},
+	{"carriage-return",	'\r'},
+	{"SO",			'\016'},
+	{"SI",			'\017'},
+	{"DLE",			'\020'},
+	{"DC1",			'\021'},
+	{"DC2",			'\022'},
+	{"DC3",			'\023'},
+	{"DC4",			'\024'},
+	{"NAK",			'\025'},
+	{"SYN",			'\026'},
+	{"ETB",			'\027'},
+	{"CAN",			'\030'},
+	{"EM",			'\031'},
+	{"SUB",			'\032'},
+	{"ESC",			'\033'},
+	{"IS4",			'\034'},
+	{"FS",			'\034'},
+	{"IS3",			'\035'},
+	{"GS",			'\035'},
+	{"IS2",			'\036'},
+	{"RS",			'\036'},
+	{"IS1",			'\037'},
+	{"US",			'\037'},
+	{"space",		' '},
+	{"exclamation-mark",	'!'},
+	{"quotation-mark",	'"'},
+	{"number-sign",		'#'},
+	{"dollar-sign",		'$'},
+	{"percent-sign",	'%'},
+	{"ampersand",		'&'},
+	{"apostrophe",		'\''},
+	{"left-parenthesis",	'('},
+	{"right-parenthesis",	')'},
+	{"asterisk",		'*'},
+	{"plus-sign",		'+'},
+	{"comma",		','},
+	{"hyphen",		'-'},
+	{"hyphen-minus",	'-'},
+	{"period",		'.'},
+	{"full-stop",		'.'},
+	{"slash",		'/'},
+	{"solidus",		'/'},
+	{"zero",		'0'},
+	{"one",			'1'},
+	{"two",			'2'},
+	{"three",		'3'},
+	{"four",		'4'},
+	{"five",		'5'},
+	{"six",			'6'},
+	{"seven",      		'7'},
+	{"eight",		'8'},
+	{"nine",		'9'},
+	{"colon",		':'},
+	{"semicolon",		';'},
+	{"less-than-sign",	'<'},
+	{"equals-sign",		'='},
+	{"greater-than-sign",	'>'},
+	{"question-mark",	'?'},
+	{"commercial-at",	'@'},
+	{"left-square-bracket",	'['},
+	{"backslash",		'\\'},
+	{"reverse-solidus",	'\\'},
+	{"right-square-bracket",']'},
+	{"circumflex",		'^'},
+	{"circumflex-accent",	'^'},
+	{"underscore",		'_'},
+	{"low-line",		'_'},
+	{"grave-accent",	'`'},
+	{"left-brace",		'{'},
+	{"left-curly-bracket",	'{'},
+	{"vertical-line",	'|'},
+	{"right-brace",		'}'},
+	{"right-curly-bracket",	'}'},
+	{"tilde",		'~'},
+	{"DEL",	'\177'},
+	{NULL,	0}
 };

diff --git a/libc/upstream-netbsd/lib/libc/regex/engine.c b/libc/upstream-netbsd/lib/libc/regex/engine.c
index 2a800d4..ca8b24d 100644
--- a/libc/upstream-netbsd/lib/libc/regex/engine.c
+++ b/libc/upstream-netbsd/lib/libc/regex/engine.c

@@ -1,6 +1,9 @@
-/*	$NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $	*/
+/* $NetBSD: engine.c,v 1.29 2021/02/25 21:47:46 christos Exp $ */
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -34,42 +37,13 @@
  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)engine.c	8.5 (Berkeley) 3/20/94
- */
+#include <sys/cdefs.h>
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 368358 2020-12-05 03:16:05Z kevans $");
+#endif
+__RCSID("$NetBSD: engine.c,v 1.29 2021/02/25 21:47:46 christos Exp $");
+
+#include <stdbool.h>
 
 /*
  * The matching engine and friends.  This file is #included by regexec.c
@@ -79,28 +53,37 @@
  */
 
 #ifdef SNAMES
+#define	stepback sstepback
 #define	matcher	smatcher
-#define	fast	sfast
-#define	slow	sslow
+#define	walk	swalk
 #define	dissect	sdissect
 #define	backref	sbackref
 #define	step	sstep
 #define	print	sprint
 #define	at	sat
 #define	match	smat
-#define	nope	snope
 #endif
 #ifdef LNAMES
+#define	stepback lstepback
 #define	matcher	lmatcher
-#define	fast	lfast
-#define	slow	lslow
+#define	walk	lwalk
 #define	dissect	ldissect
 #define	backref	lbackref
 #define	step	lstep
 #define	print	lprint
 #define	at	lat
 #define	match	lmat
-#define	nope	lnope
+#endif
+#ifdef MNAMES
+#define	stepback mstepback
+#define	matcher	mmatcher
+#define	walk	mwalk
+#define	dissect	mdissect
+#define	backref	mbackref
+#define	step	mstep
+#define	print	mprint
+#define	at	mat
+#define	match	mmat
 #endif
 
 /* another structure passed up and down to avoid zillions of parameters */
@@ -118,6 +101,7 @@
 	states fresh;		/* states for a fresh start */
 	states tmp;		/* temporary */
 	states empty;		/* empty set of states */
+	mbstate_t mbs;		/* multibyte conversion state */
 };
 
 /* ========= begin header generated by ./mkh ========= */
@@ -128,27 +112,31 @@
 /* === engine.c === */
 static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
-static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
-#define	BOL	(OUT+1)
-#define	EOL	(BOL+1)
-#define	BOLEOL	(BOL+2)
-#define	NOTHING	(BOL+3)
-#define	BOW	(BOL+4)
-#define	EOW	(BOL+5)
-#define	CODEMAX	(BOL+5)		/* highest code used */
-#define	NONCHAR(c)	((c) > CHAR_MAX)
-#define	NNONCHAR	(CODEMAX-CHAR_MAX)
+static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
+static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
+#define MAX_RECURSION	100
+#define	BOL	(OUT-1)
+#define	EOL	(BOL-1)
+#define	BOLEOL	(BOL-2)
+#define	NOTHING	(BOL-3)
+#define	BOW	(BOL-4)
+#define	EOW	(BOL-5)
+#define	BADCHAR	(BOL-6)
+#define	NWBND	(BOL-7)
+#define	NONCHAR(c)	((c) <= OUT)
+/* sflags */
+#define	SBOS	0x0001
+#define	SEOS	0x0002
+
 #ifdef REDEBUG
-static void print(struct match *m, char *caption, states st, int ch, FILE *d);
+static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
 #endif
 #ifdef REDEBUG
-static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
+static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
 #endif
 #ifdef REDEBUG
-static char *pchar(int ch);
+static const char *pchar(int ch);
 #endif
 
 #ifdef __cplusplus
@@ -160,7 +148,6 @@
 #define	SP(t, s, c)	print(m, t, s, c, stdout)
 #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
 #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
-static int nope = 0;
 #else
 #define	SP(t, s, c)	/* nothing */
 #define	AT(t, p1, p2, s1, s2)	/* nothing */
@@ -168,27 +155,70 @@
 #endif
 
 /*
+ * Given a multibyte string pointed to by start, step back nchar characters
+ * from current position pointed to by cur.
+ */
+static const char *
+stepback(const char *start, const char *cur, int nchar)
+{
+#ifdef NLS
+	const char *ret;
+	size_t wc, mbc;
+	mbstate_t mbs;
+	size_t clen;
+
+	if (MB_CUR_MAX == 1)
+		goto out;
+
+	ret = cur;
+	for (wc = nchar; wc > 0; wc--) {
+		for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
+			if ((ret - mbc) < start)
+				return (NULL);
+			memset(&mbs, 0, sizeof(mbs));
+			clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
+			if (clen != (size_t)-1 && clen != (size_t)-2)
+				break;
+		}
+		if (mbc > MB_CUR_MAX)
+			return (NULL);
+		ret -= mbc;
+	}
+
+	return (ret);
+out:
+#endif
+	return (cur - nchar) > start ? cur - nchar : NULL;
+}
+
+/*
  - matcher - the actual matching engine
- == static int matcher(struct re_guts *g, char *string, \
+ == static int matcher(struct re_guts *g, const char *string, \
  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
  */
 static int			/* 0 success, REG_NOMATCH failure */
-matcher(
-    struct re_guts *g,
-    const char *string,
-    size_t nmatch,
-    regmatch_t pmatch[],
-    int eflags)
+matcher(struct re_guts *g,
+	const char *string,
+	size_t nmatch,
+	regmatch_t pmatch[],
+	int eflags)
 {
 	const char *endp;
 	size_t i;
 	struct match mv;
 	struct match *m = &mv;
-	const char *dp;
+	const char *dp = NULL;
 	const sopno gf = g->firststate+1;	/* +1 for OEND */
 	const sopno gl = g->laststate;
 	const char *start;
 	const char *stop;
+	/* Boyer-Moore algorithms variables */
+	const char *pp;
+	size_t cj, mj;
+	const char *mustfirst;
+	const char *mustlast;
+	size_t *matchjump;
+	size_t *charjump;
 	int error = 0;
 
 	_DIAGASSERT(g != NULL);
@@ -211,12 +241,46 @@
 
 	/* prescreening; this does wonders for this rather slow code */
 	if (g->must != NULL) {
-		for (dp = start; dp < stop; dp++)
-			if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
-				memcmp(dp, g->must, g->mlen) == 0)
-				break;
-		if (dp == stop)		/* we didn't find g->must */
-			return(REG_NOMATCH);
+		if (g->charjump != NULL && g->matchjump != NULL) {
+			mustfirst = g->must;
+			mustlast = g->must + g->mlen - 1;
+			charjump = g->charjump;
+			matchjump = g->matchjump;
+			pp = mustlast;
+			for (dp = start+g->mlen-1; dp < stop;) {
+				/* Fast skip non-matches */
+				while (dp < stop && charjump[(int)*dp])
+					dp += charjump[(int)*dp];
+
+				if (dp >= stop)
+					break;
+
+				/* Greedy matcher */
+				/* We depend on not being used for
+				 * for strings of length 1
+				 */
+				while (*--dp == *--pp && pp != mustfirst);
+
+				if (*dp == *pp)
+					break;
+
+				/* Jump to next possible match */
+				mj = matchjump[pp - mustfirst];
+				cj = charjump[(int)*dp];
+				dp += (cj < mj ? mj : cj);
+				pp = mustlast;
+			}
+			if (pp != mustfirst)
+				return(REG_NOMATCH);
+		} else {
+			for (dp = start; dp < stop; dp++)
+				if (*dp == g->must[0] &&
+				    (size_t)(stop - dp) >= g->mlen &&
+				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
+					break;
+			if (dp == stop)		/* we didn't find g->must */
+				return(REG_NOMATCH);
+		}
 	}
 
 	/* match struct setup */
@@ -233,10 +297,22 @@
 	SETUP(m->tmp);
 	SETUP(m->empty);
 	CLEAR(m->empty);
+	ZAPSTATE(&m->mbs);
+
+	/* Adjust start according to moffset, to speed things up */
+	if (dp != NULL && g->moffset > -1) {
+		const char *nstart;
+
+		nstart = stepback(start, dp, g->moffset);
+		if (nstart != NULL)
+			start = nstart;
+	}
+
+	SP("mloop", m->st, *start);
 
 	/* this loop does only one repetition except for backrefs */
 	for (;;) {
-		endp = fast(m, start, stop, gf, gl);
+		endp = walk(m, start, stop, gf, gl, true);
 		if (endp == NULL) {		/* a miss */
 			error = REG_NOMATCH;
 			goto done;
@@ -248,11 +324,12 @@
 		assert(m->coldp != NULL);
 		for (;;) {
 			NOTE("finding start");
-			endp = slow(m, m->coldp, stop, gf, gl);
+			endp = walk(m, m->coldp, stop, gf, gl, false);
 			if (endp != NULL)
 				break;
 			assert(m->coldp < m->endp);
-			m->coldp++;
+			m->coldp += XMBRTOWC(NULL, m->coldp,
+			    (size_t)(m->endp - m->coldp), &m->mbs, 0);
 		}
 		if (nmatch == 1 && !g->backrefs)
 			break;		/* no further info needed */
@@ -266,20 +343,20 @@
 			goto done;
 		}
 		for (i = 1; i <= m->g->nsub; i++)
-			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
 			NOTE("dissecting");
 			dp = dissect(m, m->coldp, endp, gf, gl);
 		} else {
 			if (g->nplus > 0 && m->lastpos == NULL)
 				m->lastpos = malloc((g->nplus+1) *
-							sizeof(const char *));
+						sizeof(const char *));
 			if (g->nplus > 0 && m->lastpos == NULL) {
 				error = REG_ESPACE;
 				goto done;
 			}
 			NOTE("backref dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 		}
 		if (dp != NULL)
 			break;
@@ -291,7 +368,7 @@
 			if (dp != NULL || endp <= m->coldp)
 				break;		/* defeat */
 			NOTE("backoff");
-			endp = slow(m, m->coldp, endp-1, gf, gl);
+			endp = walk(m, m->coldp, endp-1, gf, gl, false);
 			if (endp == NULL)
 				break;		/* defeat */
 			/* try it on a shorter possibility */
@@ -302,7 +379,7 @@
 			}
 #endif
 			NOTE("backoff dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 		}
 		assert(dp == NULL || dp == endp);
 		if (dp != NULL)		/* found a shorter one */
@@ -310,7 +387,9 @@
 
 		/* despite initial appearances, there is no match here */
 		NOTE("false alarm");
-		start = m->coldp + 1;	/* recycle starting later */
+		/* recycle starting later */
+		start = m->coldp + XMBRTOWC(NULL, m->coldp,
+		    (size_t)(stop - m->coldp), &m->mbs, 0);
 		assert(start <= stop);
 	}
 
@@ -337,7 +416,7 @@
 		m->pmatch = NULL;
 	}
 	if (m->lastpos != NULL) {
-		free(m->lastpos);
+		free(__UNCONST(m->lastpos));
 		m->lastpos = NULL;
 	}
 	STATETEARDOWN(m);
@@ -349,29 +428,27 @@
  == static const char *dissect(struct match *m, const char *start, \
  ==	const char *stop, sopno startst, sopno stopst);
  */
-static const char *			/* == stop (success) always */
+static const char *		/* == stop (success) always */
 dissect(
-    struct match *m,
-    const char *start,
-    const char *stop,
-    sopno startst,
-    sopno stopst)
+	struct match *m,
+	const char *start,
+	const char *stop,
+	sopno startst,
+	sopno stopst)
 {
 	int i;
-	sopno ss;	/* start sop of current subRE */
-	sopno es;	/* end sop of current subRE */
-	const char *sp;	/* start of string matched by it */
-	const char *stp; /* string matched by it cannot pass here */
-	const char *rest; /* start of rest of string */
-	const char *tail; /* string unmatched by rest of RE */
-	sopno ssub;	/* start sop of subsubRE */
-	sopno esub;	/* end sop of subsubRE */
-	const char *ssp; /* start of string matched by subsubRE */
-	const char *sep; /* end of string matched by subsubRE */
-	const char *oldssp; /* previous ssp */
-#ifndef NDEBUG
-	const char *dp;
-#endif
+	sopno ss;		/* start sop of current subRE */
+	sopno es;		/* end sop of current subRE */
+	const char *sp;		/* start of string matched by it */
+	const char *stp;	/* string matched by it cannot pass here */
+	const char *rest;	/* start of rest of string */
+	const char *tail;	/* string unmatched by rest of RE */
+	sopno ssub;		/* start sop of subsubRE */
+	sopno esub;		/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
+	const char *sep;	/* end of string matched by subsubRE */
+	const char *oldssp;	/* previous ssp */
+	const char *dp __unused;
 
 	_DIAGASSERT(m != NULL);
 	_DIAGASSERT(start != NULL);
@@ -400,16 +477,22 @@
 			assert(nope);
 			break;
 		case OCHAR:
-			sp++;
+			sp += XMBRTOWC(NULL, sp, (size_t)(stop - start),
+			    &m->mbs, 0);
 			break;
 		case OBOL:
 		case OEOL:
 		case OBOW:
 		case OEOW:
+		case OBOS:
+		case OEOS:
+		case OWBND:
+		case ONWBND:
 			break;
 		case OANY:
 		case OANYOF:
-			sp++;
+			sp += XMBRTOWC(NULL, sp, (size_t)(stop - start),
+			    &m->mbs, 0);
 			break;
 		case OBACK_:
 		case O_BACK:
@@ -420,10 +503,10 @@
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -433,13 +516,8 @@
 			ssub = ss + 1;
 			esub = es - 1;
 			/* did innards match? */
-			if (slow(m, sp, rest, ssub, esub) != NULL) {
-#ifdef NDEBUG
-				(void)
-#else
-				dp = 
-#endif
-				    dissect(m, sp, rest, ssub, esub);
+			if (walk(m, sp, rest, ssub, esub, false) != NULL) {
+				dp = dissect(m, sp, rest, ssub, esub);
 				assert(dp == rest);
 			} else		/* no */
 				assert(sp == rest);
@@ -449,10 +527,10 @@
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -464,7 +542,7 @@
 			ssp = sp;
 			oldssp = ssp;
 			for (;;) {	/* find last match of innards */
-				sep = slow(m, ssp, rest, ssub, esub);
+				sep = walk(m, ssp, rest, ssub, esub, false);
 				if (sep == NULL || sep == ssp)
 					break;	/* failed or matched null */
 				oldssp = ssp;	/* on to next try */
@@ -476,13 +554,8 @@
 				ssp = oldssp;
 			}
 			assert(sep == rest);	/* must exhaust substring */
-			assert(slow(m, ssp, sep, ssub, esub) == rest);
-#ifdef NDEBUG
-			(void)
-#else
-			dp =
-#endif
-			    dissect(m, ssp, sep, ssub, esub);
+			assert(walk(m, ssp, sep, ssub, esub, false) == rest);
+			dp = dissect(m, ssp, sep, ssub, esub);
 			assert(dp == sep);
 			sp = rest;
 			break;
@@ -490,10 +563,10 @@
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -504,7 +577,7 @@
 			esub = ss + OPND(m->g->strip[ss]) - 1;
 			assert(OP(m->g->strip[esub]) == OOR1);
 			for (;;) {	/* find first matching branch */
-				if (slow(m, sp, rest, ssub, esub) == rest)
+				if (walk(m, sp, rest, ssub, esub, false) == rest)
 					break;	/* it matched all of it */
 				/* that one missed, try next one */
 				assert(OP(m->g->strip[esub]) == OOR1);
@@ -517,12 +590,7 @@
 				else
 					assert(OP(m->g->strip[esub]) == O_CH);
 			}
-#ifdef NDEBUG
-			(void)
-#else
-			dp =
-#endif
-			    dissect(m, sp, rest, ssub, esub);
+			dp = dissect(m, sp, rest, ssub, esub);
 			assert(dp == rest);
 			sp = rest;
 			break;
@@ -553,6 +621,17 @@
 	return(sp);
 }
 
+#define	ISBOW(m, sp)					\
+    (sp < m->endp && ISWORD(*sp) &&			\
+    ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||	\
+    (sp > m->offp && !ISWORD(*(sp-1)))))
+#define	ISEOW(m, sp)					\
+    (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||	\
+    (sp < m->endp && *sp == '\n' &&			\
+    (m->g->cflags&REG_NEWLINE)) ||			\
+    (sp < m->endp && !ISWORD(*sp)) ) &&			\
+    (sp > m->beginp && ISWORD(*(sp-1))))		\
+
 /*
  - backref - figure out what matched what, figuring in back references
  == static const char *backref(struct match *m, const char *start, \
@@ -560,25 +639,27 @@
  */
 static const char *		/* == stop (success) or NULL (failure) */
 backref(
-    struct match *m,
-    const char *start,
-    const char *stop,
-    sopno startst,
-    sopno stopst,
-    sopno lev)			/* PLUS nesting level */
+	struct match *m,
+	const char *start,
+	const char *stop,
+	sopno startst,
+	sopno stopst,
+	sopno lev,		/* PLUS nesting level */
+	int rec)
 {
 	int i;
-	sopno ss;	/* start sop of current subRE */
-	const char *sp;	/* start of string matched by it */
-	sopno ssub;	/* start sop of subsubRE */
-	sopno esub;	/* end sop of subsubRE */
-	const char *ssp; /* start of string matched by subsubRE */
+	sopno ss;		/* start sop of current subRE */
+	const char *sp;		/* start of string matched by it */
+	sopno ssub;		/* start sop of subsubRE */
+	sopno esub;		/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
 	const char *dp;
 	size_t len;
 	int hard;
 	sop s;
 	regoff_t offsave;
 	cset *cs;
+	wint_t wc;
 
 	_DIAGASSERT(m != NULL);
 	_DIAGASSERT(start != NULL);
@@ -592,23 +673,46 @@
 	for (ss = startst; !hard && ss < stopst; ss++)
 		switch (OP(s = m->g->strip[ss])) {
 		case OCHAR:
-			if (sp == stop || *sp++ != (char)OPND(s))
+			if (sp == stop)
+				return(NULL);
+			sp += XMBRTOWC(&wc, sp, (size_t)(stop - sp),
+			    &m->mbs, BADCHAR);
+			if (wc != (wint_t)OPND(s))
 				return(NULL);
 			break;
 		case OANY:
 			if (sp == stop)
 				return(NULL);
-			sp++;
+			sp += XMBRTOWC(&wc, sp, (size_t)(stop - sp),
+			    &m->mbs, BADCHAR);
+			if (wc == BADCHAR)
+				return (NULL);
 			break;
 		case OANYOF:
+			if (sp == stop)
+				return (NULL);
 			cs = &m->g->sets[OPND(s)];
-			if (sp == stop || !CHIN(cs, *sp++))
+			sp += XMBRTOWC(&wc, sp, (size_t)(stop - sp),
+			    &m->mbs, BADCHAR);
+			if (wc == BADCHAR || !CHIN(cs, wc))
+				return(NULL);
+			break;
+		case OBOS:
+			if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOS:
+			if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
+				{ /* yes */ }
+			else
 				return(NULL);
 			break;
 		case OBOL:
-			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
-					(sp < m->endp && *(sp-1) == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) )
+			if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+			    (sp > m->offp && sp < m->endp &&
+			    *(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -621,23 +725,29 @@
 			else
 				return(NULL);
 			break;
+		case OWBND:
+			if (ISBOW(m, sp) || ISEOW(m, sp))
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case ONWBND:
+			if (((sp == m->beginp) && !ISWORD(*sp)) ||
+			    (sp == m->endp && !ISWORD(*(sp - 1))))
+				{ /* yes, beginning/end of subject */ }
+			else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
+				{ /* yes, beginning/end of subject */ }
+			else
+				return(NULL);
+			break;
 		case OBOW:
-			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
-					(sp < m->endp && *(sp-1) == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) ||
-					(sp > m->beginp &&
-							!ISWORD(*(sp-1))) ) &&
-					(sp < m->endp && ISWORD(*sp)) )
+			if (ISBOW(m, sp))
 				{ /* yes */ }
 			else
 				return(NULL);
 			break;
 		case OEOW:
-			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
-					(sp < m->endp && *sp == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) ||
-					(sp < m->endp && !ISWORD(*sp)) ) &&
-					(sp > m->beginp && ISWORD(*(sp-1))) )
+			if (ISEOW(m, sp))
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -671,50 +781,47 @@
 	case OBACK_:		/* the vilest depths */
 		i = OPND(s);
 		assert(0 < i && i <= m->g->nsub);
-		if (m->pmatch[i].rm_eo == (regoff_t)-1)
+		if (m->pmatch[i].rm_eo == -1)
 			return(NULL);
-		assert(m->pmatch[i].rm_so != (regoff_t)-1);
-		len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
-		if (len == 0)
+		assert(m->pmatch[i].rm_so != -1);
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+		if (len == 0 && rec++ > MAX_RECURSION)
 			return(NULL);
 		assert(stop - m->beginp >= len);
 		if (sp > stop - len)
 			return(NULL);	/* not enough left to match */
-		ssp = m->offp + (size_t)m->pmatch[i].rm_so;
+		ssp = m->offp + m->pmatch[i].rm_so;
 		if (memcmp(sp, ssp, len) != 0)
 			return(NULL);
 		while (m->g->strip[ss] != SOP(O_BACK, i))
 			ss++;
-		return(backref(m, sp+len, stop, ss+1, stopst, lev));
-
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
 	case OQUEST_:		/* to null or not */
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 		if (dp != NULL)
 			return(dp);	/* not */
-		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
-
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
 	case OPLUS_:
 		assert(m->lastpos != NULL);
 		assert(lev+1 <= m->g->nplus);
 		m->lastpos[lev+1] = sp;
-		return(backref(m, sp, stop, ss+1, stopst, lev+1));
-
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
 	case O_PLUS:
 		if (sp == m->lastpos[lev])	/* last pass matched null */
-			return(backref(m, sp, stop, ss+1, stopst, lev-1));
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 		/* try another pass */
 		m->lastpos[lev] = sp;
-		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
 		if (dp == NULL)
-			dp = backref(m, sp, stop, ss+1, stopst, lev-1);
-		return(dp);
-
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		else
+			return(dp);
 	case OCH_:		/* find the right one, if any */
 		ssub = ss + 1;
 		esub = ss + OPND(s) - 1;
 		assert(OP(m->g->strip[esub]) == OOR1);
 		for (;;) {	/* find first matching branch */
-			dp = backref(m, sp, stop, ssub, esub, lev);
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
 			if (dp != NULL)
 				return(dp);
 			/* that one missed, try next one */
@@ -729,29 +836,28 @@
 			else
 				assert(OP(m->g->strip[esub]) == O_CH);
 		}
-
+		/* NOTREACHED */
+		break;
 	case OLPAREN:		/* must undo assignment if rest fails */
 		i = OPND(s);
 		assert(0 < i && i <= m->g->nsub);
 		offsave = m->pmatch[i].rm_so;
 		m->pmatch[i].rm_so = sp - m->offp;
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 		if (dp != NULL)
 			return(dp);
 		m->pmatch[i].rm_so = offsave;
 		return(NULL);
-
 	case ORPAREN:		/* must undo assignment if rest fails */
 		i = OPND(s);
 		assert(0 < i && i <= m->g->nsub);
 		offsave = m->pmatch[i].rm_eo;
 		m->pmatch[i].rm_eo = sp - m->offp;
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 		if (dp != NULL)
 			return(dp);
 		m->pmatch[i].rm_eo = offsave;
 		return(NULL);
-
 	default:		/* uh oh */
 		assert(nope);
 		break;
@@ -760,141 +866,66 @@
 	/* "can't happen" */
 	assert(nope);
 	/* NOTREACHED */
-	return NULL;
+	return "shut up gcc";
 }
 
 /*
- - fast - step through the string at top speed
- == static const char *fast(struct match *m, const char *start, \
- ==	const char *stop, sopno startst, sopno stopst);
+ - walk - step through the string either quickly or slowly
+ == static const char *walk(struct match *m, const char *start, \
+ ==	const char *stop, sopno startst, sopno stopst, bool fast);
  */
-static const char *		/* where tentative match ended, or NULL */
-fast(
-    struct match *m,
-    const char *start,
-    const char *stop,
-    sopno startst,
-    sopno stopst)
+static const char * /* where it ended, or NULL */
+walk(struct match *m, const char *start, const char *stop, sopno startst,
+	sopno stopst, bool fast)
 {
 	states st = m->st;
 	states fresh = m->fresh;
-	states tmp = m->tmp;
-	const char *p = start;
-	int c = (start == m->beginp) ? OUT : *(start-1);
-	int lastc;	/* previous c */
-	int flagch;
-	size_t i;
-	const char *coldp; /* last p after which no match was underway */
-
-	_DIAGASSERT(m != NULL);
-	_DIAGASSERT(start != NULL);
-	_DIAGASSERT(stop != NULL);
-
-	CLEAR(st);
-	SET1(st, startst);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
-	ASSIGN(fresh, st);
-	SP("start", st, *p);
-	coldp = NULL;
-	for (;;) {
-		/* next character */
-		lastc = c;
-		c = (p == m->endp) ? OUT : *p;
-		if (EQ(st, fresh))
-			coldp = p;
-
-		/* is there an EOL and/or BOL between lastc and c? */
-		flagch = '\0';
-		i = 0;
-		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
-				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
-			flagch = BOL;
-			i = m->g->nbol;
-		}
-		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
-				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
-			flagch = (flagch == BOL) ? BOLEOL : EOL;
-			i += m->g->neol;
-		}
-		if (i != 0) {
-			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boleol", st, c);
-		}
-
-		/* how about a word boundary? */
-		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-					(c != OUT && ISWORD(c)) ) {
-			flagch = BOW;
-		}
-		if ( (lastc != OUT && ISWORD(lastc)) &&
-				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
-			flagch = EOW;
-		}
-		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boweow", st, c);
-		}
-
-		/* are we done? */
-		if (ISSET(st, stopst) || p == stop)
-			break;		/* NOTE BREAK OUT */
-
-		/* no, we must deal with this character */
-		ASSIGN(tmp, st);
-		ASSIGN(st, fresh);
-		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
-		SP("aft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
-		p++;
-	}
-
-	assert(coldp != NULL);
-	m->coldp = coldp;
-	if (ISSET(st, stopst))
-		return(p+1);
-	else
-		return(NULL);
-}
-
-/*
- - slow - step through the string more deliberately
- == static const char *slow(struct match *m, const char *start, \
- ==	const char *stop, sopno startst, sopno stopst);
- */
-static const char *			/* where it ended */
-slow(
-    struct match *m,
-    const char *start,
-    const char *stop,
-    sopno startst,
-    sopno stopst)
-{
-	states st = m->st;
 	states empty = m->empty;
 	states tmp = m->tmp;
 	const char *p = start;
-	int c = (start == m->beginp) ? OUT : *(start-1);
-	int lastc;	/* previous c */
-	int flagch;
-	size_t i;
+	wint_t c;
+	wint_t lastc;		/* previous c */
+	wint_t flagch;
+	int sflags;
 	const char *matchp;	/* last p at which a match ended */
+	size_t i, clen;
 
 	_DIAGASSERT(m != NULL);
 	_DIAGASSERT(start != NULL);
 	_DIAGASSERT(stop != NULL);
 
-	AT("slow", start, stop, startst, stopst);
+	sflags = 0;
+	AT("walk", start, stop, startst, stopst);
 	CLEAR(st);
 	SET1(st, startst);
 	SP("sstart", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
+	st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
+	if (fast)
+		ASSIGN(fresh, st);
 	matchp = NULL;
+	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
+		c = OUT;
+	else {
+		/*
+		 * XXX Wrong if the previous character was multi-byte.
+		 * Newline never is (in encodings supported by FreeBSD),
+		 * so this only breaks the ISWORD tests below.
+		 */
+		c = (uch)*(start - 1);
+	}
 	for (;;) {
 		/* next character */
 		lastc = c;
-		c = (p == m->endp) ? OUT : *p;
+		sflags = 0;
+		if (p == m->endp) {
+			c = OUT;
+			clen = 0;
+		} else
+			clen = XMBRTOWC(&c, p, (size_t)(m->endp - p),
+			    &m->mbs, BADCHAR);
+
+		if (fast && EQ(st, fresh))
+			matchp = p;
 
 		/* is there an EOL and/or BOL between lastc and c? */
 		flagch = '\0';
@@ -909,9 +940,20 @@
 			flagch = (flagch == BOL) ? BOLEOL : EOL;
 			i += m->g->neol;
 		}
+		if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
+			sflags |= SBOS;
+			/* Step one more for BOS. */
+			i++;
+		}
+		if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
+			sflags |= SEOS;
+			/* Step one more for EOS. */
+			i++;
+		}
 		if (i != 0) {
 			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
+				st = step(m->g, startst, stopst, st, flagch, st,
+				    sflags);
 			SP("sboleol", st, c);
 		}
 
@@ -925,52 +967,78 @@
 			flagch = EOW;
 		}
 		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
+			st = step(m->g, startst, stopst, st, flagch, st, sflags);
 			SP("sboweow", st, c);
 		}
+		if (lastc != OUT && c != OUT &&
+		    ISWORD(lastc) == ISWORD(c)) {
+			flagch = NWBND;
+		} else if ((lastc == OUT && !ISWORD(c)) ||
+		    (c == OUT && !ISWORD(lastc))) {
+			flagch = NWBND;
+		}
+		if (flagch == NWBND) {
+			st = step(m->g, startst, stopst, st, flagch, st, sflags);
+			SP("snwbnd", st, c);
+		}
 
 		/* are we done? */
-		if (ISSET(st, stopst))
-			matchp = p;
-		if (EQ(st, empty) || p == stop)
+		if (ISSET(st, stopst)) {
+			if (fast)
+				break;
+			else
+				matchp = p;
+		}
+		if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
 			break;		/* NOTE BREAK OUT */
 
 		/* no, we must deal with this character */
 		ASSIGN(tmp, st);
-		ASSIGN(st, empty);
+		if (fast)
+			ASSIGN(st, fresh);
+		else
+			ASSIGN(st, empty);
 		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
+		st = step(m->g, startst, stopst, tmp, c, st, sflags);
 		SP("saft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
-		p++;
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
+		    st));
+		p += clen;
 	}
 
-	return(matchp);
+	if (fast) {
+		assert(matchp != NULL);
+		m->coldp = matchp;
+		if (ISSET(st, stopst))
+			return (p + XMBRTOWC(NULL, p, (size_t)(stop - p),
+			    &m->mbs, 0));
+		else
+			return (NULL);
+	} else
+		return (matchp);
 }
 
-
 /*
  - step - map set of states reachable before char to set reachable after
  == static states step(struct re_guts *g, sopno start, sopno stop, \
  ==	states bef, int ch, states aft);
- == #define	BOL	(OUT+1)
- == #define	EOL	(BOL+1)
- == #define	BOLEOL	(BOL+2)
- == #define	NOTHING	(BOL+3)
- == #define	BOW	(BOL+4)
- == #define	EOW	(BOL+5)
- == #define	CODEMAX	(BOL+5)		// highest code used
- == #define	NONCHAR(c)	((c) > CHAR_MAX)
- == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
+ == #define	BOL	(OUT-1)
+ == #define	EOL	(BOL-1)
+ == #define	BOLEOL	(BOL-2)
+ == #define	NOTHING	(BOL-3)
+ == #define	BOW	(BOL-4)
+ == #define	EOW	(BOL-5)
+ == #define	BADCHAR	(BOL-6)
+ == #define	NONCHAR(c)	((c) <= OUT)
  */
 static states
-step(
-    struct re_guts *g,
-    sopno start,		/* start state within strip */
-    sopno stop,			/* state after stop state within strip */
-    states bef,			/* states reachable before */
-    int ch,			/* character or NONCHAR code */
-    states aft)			/* states already known reachable after */
+step(struct re_guts *g,
+	sopno start,		/* start state within strip */
+	sopno stop,		/* state after stop state within strip */
+	states bef,		/* states reachable before */
+	wint_t ch,		/* character or NONCHAR code */
+	states aft,		/* states already known reachable after */
+	int sflags)		/* state flags */
 {
 	cset *cs;
 	sop s;
@@ -989,8 +1057,16 @@
 			break;
 		case OCHAR:
 			/* only characters can match */
-			assert(!NONCHAR(ch) || ch != (char)OPND(s));
-			if (ch == (char)OPND(s))
+			assert(!NONCHAR(ch) || ch != OPND(s));
+			if (ch == (wint_t)OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOS:
+			if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
+				FWD(aft, bef, 1);
+			break;
+		case OEOS:
+			if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
 				FWD(aft, bef, 1);
 			break;
 		case OBOL:
@@ -1009,6 +1085,14 @@
 			if (ch == EOW)
 				FWD(aft, bef, 1);
 			break;
+		case OWBND:
+			if (ch == BOW || ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case ONWBND:
+			if (ch == NWBND)
+				FWD(aft, aft, 1);
+			break;
 		case OANY:
 			if (!NONCHAR(ch))
 				FWD(aft, bef, 1);
@@ -1054,10 +1138,10 @@
 		case OOR1:		/* done a branch, find the O_CH */
 			if (ISSTATEIN(aft, here)) {
 				for (look = 1;
-						OP(s = g->strip[pc+look]) != O_CH;
-						look += OPND(s))
+				    OP(s = g->strip[pc+look]) != O_CH;
+				    look += OPND(s))
 					assert(OP(s) == OOR2);
-				FWD(aft, aft, look);
+				FWD(aft, aft, look + 1);
 			}
 			break;
 		case OOR2:		/* propagate OCH_'s marking */
@@ -1083,20 +1167,19 @@
 /*
  - print - print a set of states
  == #ifdef REDEBUG
- == static void print(struct match *m, char *caption, states st, \
+ == static void print(struct match *m, const char *caption, states st, \
  ==	int ch, FILE *d);
  == #endif
  */
 static void
-print(
-    struct match *m,
-    char *caption,
-    states st,
-    int ch,
-    FILE *d)
+print(struct match *m,
+	const char *caption,
+	states st,
+	int ch,
+	FILE *d)
 {
 	struct re_guts *g = m->g;
-	int i;
+	sopno i;
 	int first = 1;
 
 	_DIAGASSERT(m != NULL);
@@ -1112,27 +1195,26 @@
 		fprintf(d, " %s", pchar(ch));
 	for (i = 0; i < g->nstates; i++)
 		if (ISSET(st, i)) {
-			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			fprintf(d, "%s%lu", (first) ? "\t" : ", ", i);
 			first = 0;
 		}
 	fprintf(d, "\n");
 }
 
-/* 
+/*
  - at - print current situation
  == #ifdef REDEBUG
- == static void at(struct match *m, char *title, char *start, char *stop, \
- ==						sopno startst, sopno stopst);
+ == static void at(struct match *m, const char *title, const char *start, \
+ ==			 const char *stop, sopno startst, sopno stopst);
  == #endif
  */
 static void
-at(
-    struct match *m,
-    char *title,
-    char *start,
-    char *stop,
-    sopno startst,
-    sopno stopst)
+at(	struct match *m,
+	const char *title,
+	const char *start,
+	const char *stop,
+	sopno startst,
+	sopno stopst)
 {
 
 	_DIAGASSERT(m != NULL);
@@ -1153,7 +1235,7 @@
 /*
  - pchar - make a character printable
  == #ifdef REDEBUG
- == static char *pchar(int ch);
+ == static const char *pchar(int ch);
  == #endif
  *
  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
@@ -1161,28 +1243,26 @@
  * a matching debug.o, and this is convenient.  It all disappears in
  * the non-debug compilation anyway, so it doesn't matter much.
  */
-static char *			/* -> representation */
-pchar(
-    int ch)
+static const char *		/* -> representation */
+pchar(int ch)
 {
 	static char pbuf[10];
 
-	if (isprint(ch) || ch == ' ')
-		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+	if (isprint((uch)ch) || ch == ' ')
+		snprintf(pbuf, sizeof(pbuf), "%c", ch);
 	else
-		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+		snprintf(pbuf, sizeof(pbuf), "\\%o", ch);
 	return(pbuf);
 }
 #endif
 #endif
 
+#undef	stepback
 #undef	matcher
-#undef	fast
-#undef	slow
+#undef	walk
 #undef	dissect
 #undef	backref
 #undef	step
 #undef	print
 #undef	at
 #undef	match
-#undef	nope

diff --git a/libc/upstream-netbsd/lib/libc/regex/regcomp.c b/libc/upstream-netbsd/lib/libc/regex/regcomp.c
index 4a0d99a..957f8ac 100644
--- a/libc/upstream-netbsd/lib/libc/regex/regcomp.c
+++ b/libc/upstream-netbsd/lib/libc/regex/regcomp.c

@@ -1,9 +1,17 @@
-/*	$NetBSD: regcomp.c,v 1.38 2019/02/07 22:22:31 christos Exp $	*/
+/*	$NetBSD: regcomp.c,v 1.46 2021/03/11 15:00:29 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * This code is derived from software contributed to Berkeley by
  * Henry Spencer.
  *
@@ -34,74 +42,65 @@
  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
- */
+#if HAVE_NBTOOL_CONFIG_H
+#include "nbtool_config.h"
+#endif
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
-#else
-__RCSID("$NetBSD: regcomp.c,v 1.38 2019/02/07 22:22:31 christos Exp $");
+__FBSDID("$FreeBSD: head/lib/libc/regex/regcomp.c 368359 2020-12-05 03:18:48Z kevans $");
 #endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: regcomp.c,v 1.46 2021/03/11 15:00:29 christos Exp $");
+
+#define _OPENBSD_SOURCE
+
+#ifndef LIBHACK
+#define REGEX_GNU_EXTENSIONS
 
 #include "namespace.h"
+#endif
 #include <sys/types.h>
-
-#include <assert.h>
+#include <stdio.h>
+#include <string.h>
 #include <ctype.h>
 #include <limits.h>
-#include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
 #include <regex.h>
+#include <stdbool.h>
 
-#ifdef __weak_alias
+#if defined(__weak_alias) && !defined(LIBHACK)
 __weak_alias(regcomp,_regcomp)
 #endif
 
+#ifdef REGEX_LIBC_COLLATE
+#include "collate.h"
+#endif
+
 #include "utils.h"
 #include "regex2.h"
 
-#include "cclass.h"
 #include "cname.h"
 
 /*
+ * Branching context, used to keep track of branch state for all of the branch-
+ * aware functions. In addition to keeping track of branch positions for the
+ * p_branch_* functions, we use this to simplify some clumsiness in BREs for
+ * detection of whether ^ is acting as an anchor or being used erroneously and
+ * also for whether we're in a sub-expression or not.
+ */
+struct branchc {
+	sopno start;
+	sopno back;
+	sopno fwd;
+
+	int nbranch;
+	int nchain;
+	bool outer;
+	bool terminate;
+};
+
+/*
  * parse structure, passed up and down to avoid global variables and
  * other clumsinesses
  */
@@ -109,6 +108,7 @@
 	const char *next;	/* next character in RE */
 	const char *end;	/* end of string (-> NUL normally) */
 	int error;		/* has an error been seen? */
+	int gnuext;
 	sop *strip;		/* malloced strip */
 	sopno ssize;		/* malloced strip size (allocated) */
 	sopno slen;		/* malloced strip length (used) */
@@ -117,56 +117,70 @@
 #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+	bool allowbranch;	/* can this expression branch? */
+	bool bre;		/* convenience; is this a BRE? */
+	int pflags;		/* other parsing flags -- legacy escapes? */
+	bool (*parse_expr)(struct parse *, struct branchc *);
+	void (*pre_parse)(struct parse *, struct branchc *);
+	void (*post_parse)(struct parse *, struct branchc *);
 };
 
+#define PFLAG_LEGACY_ESC	0x00000001
+
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /* === regcomp.c === */
-static void p_ere(struct parse *p, int stop, size_t reclimit);
-static void p_ere_exp(struct parse *p, size_t reclimit);
+static bool p_ere_exp(struct parse *p, struct branchc *bc);
 static void p_str(struct parse *p);
-static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
-static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
+static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
+static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
+static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
+static bool p_branch_empty(struct parse *p, struct branchc *bc);
+static bool p_branch_do(struct parse *p, struct branchc *bc);
+static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
+static void p_bre_post_parse(struct parse *p, struct branchc *bc);
+static void p_re(struct parse *p, int end1, int end2);
+static bool p_simp_re(struct parse *p, struct branchc *bc);
 static int p_count(struct parse *p);
 static void p_bracket(struct parse *p);
+static int p_range_cmp(wchar_t c1, wchar_t c2);
 static void p_b_term(struct parse *p, cset *cs);
+#ifdef REGEX_GNU_EXTENSIONS
+static int p_b_pseudoclass(struct parse *p, char c);
+#endif
 static void p_b_cclass(struct parse *p, cset *cs);
+static void p_b_cclass_named(struct parse *p, cset *cs, const char[]);
 static void p_b_eclass(struct parse *p, cset *cs);
-static char p_b_symbol(struct parse *p);
-static char p_b_coll_elem(struct parse *p, int endc);
-static int othercase(int ch);
-static void bothcases(struct parse *p, int ch);
-static void ordinary(struct parse *p, int ch);
+static wint_t p_b_symbol(struct parse *p);
+static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
+static bool may_escape(struct parse *p, const wint_t ch);
+static wint_t othercase(wint_t ch);
+static void bothcases(struct parse *p, wint_t ch);
+static void ordinary(struct parse *p, wint_t ch);
 static void nonnewline(struct parse *p);
-static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
+static void repeat(struct parse *p, sopno start, int from, int to);
 static int seterr(struct parse *p, int e);
 static cset *allocset(struct parse *p);
 static void freeset(struct parse *p, cset *cs);
-static sopno freezeset(struct parse *p, cset *cs);
-static int firstch(struct parse *p, cset *cs);
-static int nch(struct parse *p, cset *cs);
-static void mcadd(struct parse *p, cset *cs, const char *cp);
-#if 0
-static void mcsub(cset *cs, char *cp);
-static int mcin(cset *cs, char *cp);
-static char *mcfind(cset *cs, char *cp);
-#endif
-static void mcinvert(struct parse *p, cset *cs);
-static void mccase(struct parse *p, cset *cs);
-static int isinsets(struct re_guts *g, int c);
-static int samesets(struct re_guts *g, int c1, int c2);
-static void categorize(struct parse *p, struct re_guts *g);
+static void CHadd(struct parse *p, cset *cs, wint_t ch);
+static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
+static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
+static wint_t singleton(cset *cs);
 static sopno dupl(struct parse *p, sopno start, sopno finish);
-static void doemit(struct parse *p, sop op, sopno opnd);
-static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
-static void dofwd(struct parse *p, sopno pos, sopno value);
+static void doemit(struct parse *p, sop op, size_t opnd);
+static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
+static void dofwd(struct parse *p, sopno pos, sop value);
 static int enlarge(struct parse *p, sopno size);
 static void stripsnug(struct parse *p, struct re_guts *g);
 static void findmust(struct parse *p, struct re_guts *g);
+static int altoffset(sop *scan, int offset);
+static void computejumps(struct parse *p, struct re_guts *g);
+static void computematchjumps(struct parse *p, struct re_guts *g);
 static sopno pluscount(struct parse *p, struct re_guts *g);
+static wint_t wgetnext(struct parse *p);
 
 #ifdef __cplusplus
 }
@@ -185,19 +199,22 @@
 #define	MORE2()	(p->next+1 < p->end)
 #define	SEE(c)	(MORE() && PEEK() == (c))
 #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
 #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
 #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define	EATSPEC(a)	(p->bre ? EATTWO('\\', a) : EAT(a))
 #define	NEXT()	(p->next++)
 #define	NEXT2()	(p->next += 2)
 #define	NEXTn(n)	(p->next += (n))
 #define	GETNEXT()	(*p->next++)
+#define	WGETNEXT()	wgetnext(p)
 #define	SETERROR(e)	seterr(p, (e))
-#define	REQUIRE(co, e)	(void) ((co) || SETERROR(e))
+#define	REQUIRE(co, e)	((co) || SETERROR(e))
 #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
-#define	MUSTEAT(c, e)	(void) (REQUIRE(MORE() && GETNEXT() == (c), e))
+#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
 #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
-#define	EMIT(op, sopnd)	doemit(p, (sop)(op), sopnd)
-#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define	EMIT(op, sopnd)	doemit(p, (op), (sopnd))
+#define	INSERT(op, pos)	doinsert(p, (op), HERE()-(pos)+1, pos)
 #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
 #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
 #define	HERE()		(p->slen)
@@ -205,42 +222,62 @@
 #define	THERETHERE()	(p->slen - 2)
 #define	DROP(n)	(p->slen -= (n))
 
-#ifndef NDEBUG
-static int never = 0;		/* for use in asserts; shuts lint up */
-#else
-#define	never	0		/* some <assert.h>s have bugs too */
+/* Macro used by computejump()/computematchjump() */
+#ifndef MIN
+#define MIN(a,b)	((a)<(b)?(a):(b))
 #endif
 
-#define	MEMLIMIT	0x8000000
-#define MEMSIZE(p) \
-	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
-	(p)->ncsalloc * sizeof(cset) + \
-	(p)->ssize * sizeof(sop))
-#define	RECLIMIT	256
+#ifndef NLS
+static const struct {
+	const char *name;
+	int (*func)(int);
+} wctypes[] = {
+#define ADD(x) { .name = # x, .func = is ## x }
+	ADD(alnum),
+	ADD(alpha),
+	ADD(blank),
+	ADD(cntrl),
+	ADD(digit),
+	ADD(graph),
+	ADD(lower),
+	ADD(print),
+	ADD(punct),
+	ADD(space),
+	ADD(upper),
+	ADD(xdigit),
+#undef ADD
+};
 
-/*
- - regcomp - interface for parser and compilation
- = extern int regcomp(regex_t *, const char *, int);
- = #define	REG_BASIC	0000
- = #define	REG_EXTENDED	0001
- = #define	REG_ICASE	0002
- = #define	REG_NOSUB	0004
- = #define	REG_NEWLINE	0010
- = #define	REG_NOSPEC	0020
- = #define	REG_PEND	0040
- = #define	REG_DUMP	0200
- */
-int				/* 0 success, otherwise REG_something */
-regcomp(
-    regex_t *preg,
-    const char *pattern,
-    int cflags)
+wctype_t
+__regex_wctype(const char *str)
+{
+	for (size_t i = 0; i < __arraycount(wctypes); i++) {
+		if (strcmp(wctypes[i].name, str) == 0)
+			return (wctype_t)(i + 1);
+	}
+	return (wctype_t)0;
+}
+
+int
+__regex_iswctype(wint_t c, wctype_t ct)
+{
+	if (ct == 0)
+		return 0;
+	return (*wctypes[ct - 1].func)(c);
+}
+#endif
+
+static int				/* 0 success, otherwise REG_something */
+regcomp_internal(regex_t * __restrict preg,
+	const char * __restrict pattern,
+	int cflags, int pflags)
 {
 	struct parse pa;
 	struct re_guts *g;
 	struct parse *p = &pa;
 	int i;
 	size_t len;
+	size_t maxlen;
 #ifdef REDEBUG
 #	define	GOODFLAGS(f)	(f)
 #else
@@ -262,11 +299,27 @@
 		len = strlen(pattern);
 
 	/* do the mallocs early so failure handling is easy */
-	g = malloc(sizeof(struct re_guts) + (NC - 1) * sizeof(cat_t));
+	g = malloc(sizeof(*g));
 	if (g == NULL)
 		return(REG_ESPACE);
-	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
-	p->strip = calloc(p->ssize, sizeof(sop));
+	/*
+	 * Limit the pattern space to avoid a 32-bit overflow on buffer
+	 * extension.  Also avoid any signed overflow in case of conversion
+	 * so make the real limit based on a 31-bit overflow.
+	 *
+	 * Likely not applicable on 64-bit systems but handle the case
+	 * generically (who are we to stop people from using ~715MB+
+	 * patterns?).
+	 */
+	maxlen = ((size_t)-1 >> 1) / sizeof(*p->strip) * 2 / 3;
+	if (len >= maxlen) {
+		free(g);
+		return(REG_ESPACE);
+	}
+	p->ssize = (sopno)(len / 2 * 3 + 1);	/* ugh */
+	assert(p->ssize >= len);
+
+	p->strip = calloc(p->ssize, sizeof(*p->strip));
 	p->slen = 0;
 	if (p->strip == NULL) {
 		free(g);
@@ -275,46 +328,74 @@
 
 	/* set things up */
 	p->g = g;
-	p->next = pattern;
+	p->next = pattern;	/* convenience; we do not modify it */
 	p->end = p->next + len;
 	p->error = 0;
 	p->ncsalloc = 0;
+	p->pflags = pflags;
 	for (i = 0; i < NPAREN; i++) {
 		p->pbegin[i] = 0;
 		p->pend[i] = 0;
 	}
-	g->csetsize = NC;
+#ifdef REGEX_GNU_EXTENSIONS
+	if ((cflags & REG_GNU) == 0) {
+		p->gnuext = false;
+		p->allowbranch = (cflags & REG_EXTENDED) != 0;
+	} else
+		p->gnuext = p->allowbranch = true;
+#else
+	p->gnuext = false;
+	p->allowbranch = (cflags & REG_EXTENDED) != 0;
+#endif
+	if (cflags & REG_EXTENDED) {
+		p->bre = false;
+		p->parse_expr = p_ere_exp;
+		p->pre_parse = NULL;
+		p->post_parse = NULL;
+	} else {
+		p->bre = true;
+		p->parse_expr = p_simp_re;
+		p->pre_parse = p_bre_pre_parse;
+		p->post_parse = p_bre_post_parse;
+	}
 	g->sets = NULL;
-	g->setbits = NULL;
 	g->ncsets = 0;
 	g->cflags = cflags;
 	g->iflags = 0;
 	g->nbol = 0;
 	g->neol = 0;
 	g->must = NULL;
+	g->moffset = -1;
+	g->charjump = NULL;
+	g->matchjump = NULL;
 	g->mlen = 0;
 	g->nsub = 0;
-	g->ncategories = 1;	/* category 0 is "everything else" */
-	g->categories = &g->catspace[-(CHAR_MIN)];
-	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
 	g->backrefs = 0;
 
 	/* do it */
 	EMIT(OEND, 0);
 	g->firststate = THERE();
-	if (cflags&REG_EXTENDED)
-		p_ere(p, OUT, 0);
-	else if (cflags&REG_NOSPEC)
+	if (cflags & REG_NOSPEC)
 		p_str(p);
 	else
-		p_bre(p, OUT, OUT, 0);
+		p_re(p, OUT, OUT);
 	EMIT(OEND, 0);
 	g->laststate = THERE();
 
 	/* tidy up loose ends and fill things in */
-	categorize(p, g);
 	stripsnug(p, g);
 	findmust(p, g);
+	/* only use Boyer-Moore algorithm if the pattern is bigger
+	 * than three characters
+	 */
+	if(g->mlen > 3) {
+		computejumps(p, g);
+		computematchjumps(p, g);
+		if(g->matchjump == NULL && g->charjump != NULL) {
+			free(g->charjump);
+			g->charjump = NULL;
+		}
+	}
 	g->nplus = pluscount(p, g);
 	g->magic = MAGIC2;
 	preg->re_nsub = g->nsub;
@@ -333,97 +414,72 @@
 }
 
 /*
- - p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(struct parse *p, int stop, size_t reclimit);
+ - regcomp - interface for parser and compilation
+ = extern int regcomp(regex_t *, const char *, int);
+ = #define	REG_BASIC	0000
+ = #define	REG_EXTENDED	0001
+ = #define	REG_ICASE	0002
+ = #define	REG_NOSUB	0004
+ = #define	REG_NEWLINE	0010
+ = #define	REG_NOSPEC	0020
+ = #define	REG_PEND	0040
+ = #define	REG_DUMP	0200
  */
-static void
-p_ere(
-    struct parse *p,
-    int stop,			/* character this ERE should end at */
-    size_t reclimit)
+int				/* 0 success, otherwise REG_something */
+regcomp(regex_t * __restrict preg,
+	const char * __restrict pattern,
+	int cflags)
 {
-	char c;
-	sopno prevback = 0;	/* pacify gcc */
-	sopno prevfwd = 0; 	/* pacify gcc */
-	sopno conc;
-	int first = 1;		/* is this the first alternative? */
 
-	_DIAGASSERT(p != NULL);
-
-	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
-		p->error = REG_ESPACE;
-		return;
-	}
-
-	for (;;) {
-		/* do a bunch of concatenated expressions */
-		conc = HERE();
-		while (MORE() && (c = PEEK()) != '|' && c != stop)
-			p_ere_exp(p, reclimit);
-		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
-
-		if (!EAT('|'))
-			break;		/* NOTE BREAK OUT */
-
-		if (first) {
-			INSERT(OCH_, conc);	/* offset is wrong */
-			prevfwd = conc;
-			prevback = conc;
-			first = 0;
-		}
-		ASTERN(OOR1, prevback);
-		prevback = THERE();
-		AHEAD(prevfwd);			/* fix previous offset */
-		prevfwd = HERE();
-		EMIT(OOR2, 0);			/* offset is very wrong */
-	}
-
-	if (!first) {		/* tail-end fixups */
-		AHEAD(prevfwd);
-		ASTERN(O_CH, prevback);
-	}
-
-	assert(!MORE() || SEE(stop));
+	return (regcomp_internal(preg, pattern, cflags, 0));
 }
 
 /*
- - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- == static void p_ere_exp(struct parse *p, size_t reclimit);
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
+ - return whether we should terminate or not
+ == static bool p_ere_exp(struct parse *p);
  */
-static void
-p_ere_exp(
-    struct parse *p,
-    size_t reclimit)
+static bool
+p_ere_exp(struct parse *p, struct branchc *bc)
 {
 	char c;
+	wint_t wc;
 	sopno pos;
 	int count;
 	int count2;
+#ifdef REGEX_GNU_EXTENSIONS
+	size_t i;
+	int handled;
+#endif
 	sopno subno;
 	int wascaret = 0;
 
 	_DIAGASSERT(p != NULL);
 
+	(void)bc;
 	assert(MORE());		/* caller should have ensured this */
 	c = GETNEXT();
 
+#ifdef REGEX_GNU_EXTENSIONS
+	handled = 0;
+#endif
 	pos = HERE();
 	switch (c) {
 	case '(':
-		REQUIRE(MORE(), REG_EPAREN);
+		(void)REQUIRE(MORE(), REG_EPAREN);
 		p->g->nsub++;
-		subno = p->g->nsub;
+		subno = (sopno)p->g->nsub;
 		if (subno < NPAREN)
 			p->pbegin[subno] = HERE();
 		EMIT(OLPAREN, subno);
 		if (!SEE(')'))
-			p_ere(p, ')', reclimit);
+			p_re(p, ')', IGN);
 		if (subno < NPAREN) {
 			p->pend[subno] = HERE();
 			assert(p->pend[subno] != 0);
 		}
 		EMIT(ORPAREN, subno);
-		MUSTEAT(')', REG_EPAREN);
+		(void)MUSTEAT(')', REG_EPAREN);
 		break;
 #ifndef POSIX_MISTAKE
 	case ')':		/* happens only if no current unmatched ( */
@@ -454,6 +510,7 @@
 	case '*':
 	case '+':
 	case '?':
+	case '{':
 		SETERROR(REG_BADRPT);
 		break;
 	case '.':
@@ -466,30 +523,118 @@
 		p_bracket(p);
 		break;
 	case '\\':
-		REQUIRE(MORE(), REG_EESCAPE);
-		c = GETNEXT();
-		ordinary(p, c);
+		(void)REQUIRE(MORE(), REG_EESCAPE);
+		wc = WGETNEXT();
+#ifdef REGEX_GNU_EXTENSIONS
+		if (p->gnuext) {
+			handled = 1;
+			switch (wc) {
+			case '`':
+				EMIT(OBOS, 0);
+				break;
+			case '\'':
+				EMIT(OEOS, 0);
+				break;
+			case 'B':
+				EMIT(ONWBND, 0);
+				break;
+			case 'b':
+				EMIT(OWBND, 0);
+				break;
+			case 'W':
+			case 'w':
+			case 'S':
+			case 's':
+				p_b_pseudoclass(p, wc);
+				break;
+			case 'a':
+				ordinary(p, '\a');
+				break;
+			case 'e':
+				ordinary(p, '\e');
+				break;
+			case 'f':
+				ordinary(p, '\f');
+				break;
+			case 'n':
+				ordinary(p, '\n');
+				break;
+			case 'r':
+				ordinary(p, '\r');
+				break;
+			case 't':
+				ordinary(p, '\t');
+				break;
+			case 'v':
+				ordinary(p, '\v');
+				break;
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				i = wc - '0';
+				assert(i < NPAREN);
+				if (p->pend[i] != 0) {
+					assert(i <= p->g->nsub);
+					EMIT(OBACK_, i);
+					assert(p->pbegin[i] != 0);
+					assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+					assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+					(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+					EMIT(O_BACK, i);
+				} else
+					SETERROR(REG_ESUBREG);
+				p->g->backrefs = 1;
+				break;
+			default:
+				handled = 0;
+			}
+			/* Don't proceed to the POSIX bits if we've already handled it */
+			if (handled)
+				break;
+		}
+#endif
+		switch (wc) {
+		case '<':
+			EMIT(OBOW, 0);
+			break;
+		case '>':
+			EMIT(OEOW, 0);
+			break;
+		default:
+			if (may_escape(p, wc))
+				ordinary(p, wc);
+			else
+				SETERROR(REG_EESCAPE);
+			break;
+		}
 		break;
-	case '{':		/* okay as ordinary except if digit follows */
-		REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT);
-		/* FALLTHROUGH */
 	default:
 		if (p->error != 0)
-			return;
-		ordinary(p, c);
+			return (false);
+		p->next--;
+		wc = WGETNEXT();
+		ordinary(p, wc);
 		break;
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	/* we call { a repetition if followed by a digit */
-	if (!( c == '*' || c == '+' || c == '?' ||
-	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ))
-		return;		/* no repetition, we're done */
+	if (!( c == '*' || c == '+' || c == '?' || c == '{'))
+		return (false);		/* no repetition, we're done */
+	else if (c == '{')
+		(void)REQUIRE(MORE2() && \
+		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
 	NEXT();
 
-	REQUIRE(!wascaret, REG_BADRPT);
+	(void)REQUIRE(!wascaret, REG_BADRPT);
 	switch (c) {
 	case '*':	/* implemented as +? */
 		/* this case does not require the (y|) trick, noKLUDGE */
@@ -514,30 +659,31 @@
 	case '{':
 		count = p_count(p);
 		if (EAT(',')) {
-			if (isdigit((unsigned char)PEEK())) {
+			if (isdigit((uch)PEEK())) {
 				count2 = p_count(p);
-				REQUIRE(count <= count2, REG_BADBR);
+				(void)REQUIRE(count <= count2, REG_BADBR);
 			} else		/* single number with comma */
 				count2 = INFINITY;
 		} else		/* just a single number */
 			count2 = count;
-		repeat(p, pos, count, count2, 0);
+		repeat(p, pos, count, count2);
 		if (!EAT('}')) {	/* error heuristics */
 			while (MORE() && PEEK() != '}')
 				NEXT();
-			REQUIRE(MORE(), REG_EBRACE);
+			(void)REQUIRE(MORE(), REG_EBRACE);
 			SETERROR(REG_BADBR);
 		}
 		break;
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	if (!( c == '*' || c == '+' || c == '?' ||
-	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) )
-		return;
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+		return (false);
 	SETERROR(REG_BADRPT);
+	return (false);
 }
 
 /*
@@ -545,159 +691,350 @@
  == static void p_str(struct parse *p);
  */
 static void
-p_str(
-    struct parse *p)
+p_str(struct parse *p)
 {
-
-	_DIAGASSERT(p != NULL);
-
-	REQUIRE(MORE(), REG_EMPTY);
+	(void)REQUIRE(MORE(), REG_EMPTY);
 	while (MORE())
-		ordinary(p, GETNEXT());
+		ordinary(p, WGETNEXT());
 }
 
 /*
- - p_bre - BRE parser top level, anchoring and concatenation
- == static void p_bre(struct parse *p, int end1, \
- ==	int end2, size_t reclimit);
- * Giving end1 as OUT essentially eliminates the end1/end2 check.
- *
- * This implementation is a bit of a kludge, in that a trailing $ is first
- * taken as an ordinary character and then revised to be an anchor.  The
- * only undesirable side effect is that '$' gets included as a character
- * category in such cases.  This is fairly harmless; not worth fixing.
- * The amount of lookahead needed to avoid this kludge is excessive.
+ * Eat consecutive branch delimiters for the kind of expression that we are
+ * parsing, return the number of delimiters that we ate.
+ */
+static int
+p_branch_eat_delim(struct parse *p, struct branchc *bc)
+{
+	int nskip;
+
+	(void)bc;
+	nskip = 0;
+	while (EATSPEC('|'))
+		++nskip;
+	return (nskip);
+}
+
+/*
+ * Insert necessary branch book-keeping operations. This emits a
+ * bogus 'next' offset, since we still have more to parse
  */
 static void
-p_bre(
-    struct parse *p,
-    int end1,		/* first terminating character */
-    int end2,		/* second terminating character */
-    size_t reclimit)
+p_branch_ins_offset(struct parse *p, struct branchc *bc)
 {
-	sopno start;
-	int first = 1;			/* first subexpression? */
-	int wasdollar = 0;
 
-	_DIAGASSERT(p != NULL);
-
-	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
-		p->error = REG_ESPACE;
-		return;
+	if (bc->nbranch == 0) {
+		INSERT(OCH_, bc->start);	/* offset is wrong */
+		bc->fwd = bc->start;
+		bc->back = bc->start;
 	}
 
-	start = HERE();
+	ASTERN(OOR1, bc->back);
+	bc->back = THERE();
+	AHEAD(bc->fwd);			/* fix previous offset */
+	bc->fwd = HERE();
+	EMIT(OOR2, 0);			/* offset is very wrong */
+	++bc->nbranch;
+}
 
+/*
+ * Fix the offset of the tail branch, if we actually had any branches.
+ * This is to correct the bogus placeholder offset that we use.
+ */
+static void
+p_branch_fix_tail(struct parse *p, struct branchc *bc)
+{
+
+	/* Fix bogus offset at the tail if we actually have branches */
+	if (bc->nbranch > 0) {
+		AHEAD(bc->fwd);
+		ASTERN(O_CH, bc->back);
+	}
+}
+
+/*
+ * Signal to the parser that an empty branch has been encountered; this will,
+ * in the future, be used to allow for more permissive behavior with empty
+ * branches. The return value should indicate whether parsing may continue
+ * or not.
+ */
+static bool
+p_branch_empty(struct parse *p, struct branchc *bc)
+{
+
+	(void)bc;
+	SETERROR(REG_EMPTY);
+	return (false);
+}
+
+/*
+ * Take care of any branching requirements. This includes inserting the
+ * appropriate branching instructions as well as eating all of the branch
+ * delimiters until we either run out of pattern or need to parse more pattern.
+ */
+static bool
+p_branch_do(struct parse *p, struct branchc *bc)
+{
+	int ate = 0;
+
+	ate = p_branch_eat_delim(p, bc);
+	if (ate == 0)
+		return (false);
+	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
+		/*
+		 * Halt parsing only if we have an empty branch and p_branch_empty
+		 * indicates that we must not continue. In the future, this will not
+		 * necessarily be an error.
+		 */
+		return (false);
+	p_branch_ins_offset(p, bc);
+
+	return (true);
+}
+
+static void
+p_bre_pre_parse(struct parse *p, struct branchc *bc)
+{
+
+	(void)bc;
+	/*
+	 * Does not move cleanly into expression parser because of
+	 * ordinary interpration of * at the beginning position of
+	 * an expression.
+	 */
 	if (EAT('^')) {
 		EMIT(OBOL, 0);
 		p->g->iflags |= USEBOL;
 		p->g->nbol++;
 	}
-	while (MORE() && !SEETWO(end1, end2)) {
-		wasdollar = p_simp_re(p, first, reclimit);
-		first = 0;
-	}
-	if (wasdollar) {	/* oops, that was a trailing anchor */
+}
+
+static void
+p_bre_post_parse(struct parse *p, struct branchc *bc)
+{
+
+	/* Expression is terminating due to EOL token */
+	if (bc->terminate) {
 		DROP(1);
 		EMIT(OEOL, 0);
 		p->g->iflags |= USEEOL;
 		p->g->neol++;
 	}
+}
 
-	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
+/*
+ - p_re - Top level parser, concatenation and BRE anchoring
+ == static void p_re(struct parse *p, int end1, int end2);
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_re(struct parse *p,
+	int end1,	/* first terminating character */
+	int end2)	/* second terminating character; ignored for EREs */
+{
+	struct branchc bc;
+
+	bc.nbranch = 0;
+	if (end1 == OUT && end2 == OUT)
+		bc.outer = true;
+	else
+		bc.outer = false;
+#define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
+	for (;;) {
+		bc.start = HERE();
+		bc.nchain = 0;
+		bc.terminate = false;
+		if (p->pre_parse != NULL)
+			p->pre_parse(p, &bc);
+		while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) {
+			bc.terminate = p->parse_expr(p, &bc);
+			++bc.nchain;
+		}
+		if (p->post_parse != NULL)
+			p->post_parse(p, &bc);
+		(void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY);
+#ifdef REGEX_GNU_EXTENSIONS
+		if (p->gnuext && HERE() == bc.start && !p_branch_empty(p, &bc))
+			break;
+#endif
+		if (!p->allowbranch)
+			break;
+		/*
+		 * p_branch_do's return value indicates whether we should
+		 * continue parsing or not. This is both for correctness and
+		 * a slight optimization, because it will check if we've
+		 * encountered an empty branch or the end of the string
+		 * immediately following a branch delimiter.
+		 */
+		if (!p_branch_do(p, &bc))
+			break;
+	}
+#undef SEE_END
+	if (p->allowbranch)
+		p_branch_fix_tail(p, &bc);
+	assert(!MORE() || SEE(end1));
 }
 
 /*
  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
+ == static bool p_simp_re(struct parse *p, struct branchc *bc);
  */
-static int			/* was the simple RE an unbackslashed $? */
-p_simp_re(
-    struct parse *p,
-    int starordinary,		/* is a leading * an ordinary character? */
-    size_t reclimit)
+static bool			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p, struct branchc *bc)
 {
 	int c;
+	int cc;			/* convenient/control character */
 	int count;
 	int count2;
-	sopno pos, i;
+	sopno pos;
+	bool handled;
+	size_t i;
+	wint_t wc;
 	sopno subno;
 #	define	BACKSL	(1<<CHAR_BIT)
 
-	_DIAGASSERT(p != NULL);
-
-	pos = HERE();		/* repetion op, if any, covers from here */
+	pos = HERE();		/* repetition op, if any, covers from here */
+	handled = false;
 
 	assert(MORE());		/* caller should have ensured this */
 	c = GETNEXT();
 	if (c == '\\') {
-		REQUIRE(MORE(), REG_EESCAPE);
-		c = BACKSL | (unsigned char)GETNEXT();
-	}
-	switch (c) {
-	case '.':
-		if (p->g->cflags&REG_NEWLINE)
-			nonnewline(p);
-		else
-			EMIT(OANY, 0);
-		break;
-	case '[':
-		p_bracket(p);
-		break;
-	case BACKSL|'{':
-		SETERROR(REG_BADRPT);
-		break;
-	case BACKSL|'(':
-		p->g->nsub++;
-		subno = p->g->nsub;
-		if (subno < NPAREN)
-			p->pbegin[subno] = HERE();
-		EMIT(OLPAREN, subno);
-		/* the MORE here is an error heuristic */
-		if (MORE() && !SEETWO('\\', ')'))
-			p_bre(p, '\\', ')', reclimit);
-		if (subno < NPAREN) {
-			p->pend[subno] = HERE();
-			assert(p->pend[subno] != 0);
+		(void)REQUIRE(MORE(), REG_EESCAPE);
+		cc = GETNEXT();
+		c = BACKSL | cc;
+#ifdef REGEX_GNU_EXTENSIONS
+		if (p->gnuext) {
+			handled = true;
+			switch (c) {
+			case BACKSL|'`':
+				EMIT(OBOS, 0);
+				break;
+			case BACKSL|'\'':
+				EMIT(OEOS, 0);
+				break;
+			case BACKSL|'B':
+				EMIT(ONWBND, 0);
+				break;
+			case BACKSL|'b':
+				EMIT(OWBND, 0);
+				break;
+			case BACKSL|'W':
+			case BACKSL|'w':
+			case BACKSL|'S':
+			case BACKSL|'s':
+				p_b_pseudoclass(p, cc);
+				break;
+			case BACKSL|'a':
+				ordinary(p, '\a');
+				break;
+			case BACKSL|'e':
+				ordinary(p, '\e');
+				break;
+			case BACKSL|'f':
+				ordinary(p, '\f');
+				break;
+			case BACKSL|'n':
+				ordinary(p, '\n');
+				break;
+			case BACKSL|'r':
+				ordinary(p, '\r');
+				break;
+			case BACKSL|'t':
+				ordinary(p, '\t');
+				break;
+			case BACKSL|'v':
+				ordinary(p, '\v');
+				break;
+			default:
+				handled = false;
+			}
 		}
-		EMIT(ORPAREN, subno);
-		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
-		break;
-	case BACKSL|')':	/* should not get here -- must be user */
-	case BACKSL|'}':
-		SETERROR(REG_EPAREN);
-		break;
-	case BACKSL|'1':
-	case BACKSL|'2':
-	case BACKSL|'3':
-	case BACKSL|'4':
-	case BACKSL|'5':
-	case BACKSL|'6':
-	case BACKSL|'7':
-	case BACKSL|'8':
-	case BACKSL|'9':
-		i = (c&~BACKSL) - '0';
-		assert(i < NPAREN);
-		if (p->pend[i] != 0) {
-			assert(i <= p->g->nsub);
-			EMIT(OBACK_, i);
-			assert(p->pbegin[i] != 0);
-			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
-			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
-			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
-			EMIT(O_BACK, i);
-		} else
-			SETERROR(REG_ESUBREG);
-		p->g->backrefs = 1;
-		break;
-	case '*':
-		REQUIRE(starordinary, REG_BADRPT);
-		/* FALLTHROUGH */
-	default:
-		if (p->error != 0)
-			return(0);
-		ordinary(p, c &~ BACKSL);
-		break;
+#endif
+	}
+	if (!handled) {
+		switch (c) {
+		case '.':
+			if (p->g->cflags&REG_NEWLINE)
+				nonnewline(p);
+			else
+				EMIT(OANY, 0);
+			break;
+		case '[':
+			p_bracket(p);
+			break;
+		case BACKSL|'<':
+			EMIT(OBOW, 0);
+			break;
+		case BACKSL|'>':
+			EMIT(OEOW, 0);
+			break;
+		case BACKSL|'{':
+			SETERROR(REG_BADRPT);
+			break;
+		case BACKSL|'(':
+			p->g->nsub++;
+			subno = (sopno)p->g->nsub;
+			if (subno < NPAREN)
+				p->pbegin[subno] = HERE();
+			EMIT(OLPAREN, subno);
+			/* the MORE here is an error heuristic */
+			if (MORE() && !SEETWO('\\', ')'))
+				p_re(p, '\\', ')');
+			if (subno < NPAREN) {
+				p->pend[subno] = HERE();
+				assert(p->pend[subno] != 0);
+			}
+			EMIT(ORPAREN, subno);
+			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+			break;
+		case BACKSL|')':	/* should not get here -- must be user */
+			SETERROR(REG_EPAREN);
+			break;
+		case BACKSL|'1':
+		case BACKSL|'2':
+		case BACKSL|'3':
+		case BACKSL|'4':
+		case BACKSL|'5':
+		case BACKSL|'6':
+		case BACKSL|'7':
+		case BACKSL|'8':
+		case BACKSL|'9':
+			i = (c&~BACKSL) - '0';
+			assert(i < NPAREN);
+			if (p->pend[i] != 0) {
+				assert(i <= p->g->nsub);
+				EMIT(OBACK_, i);
+				assert(p->pbegin[i] != 0);
+				assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+				assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+				EMIT(O_BACK, i);
+			} else
+				SETERROR(REG_ESUBREG);
+			p->g->backrefs = 1;
+			break;
+		case '*':
+			/*
+			 * Ordinary if used as the first character beyond BOL anchor of
+			 * a (sub-)expression, counts as a bad repetition operator if it
+			 * appears otherwise.
+			 */
+			(void)REQUIRE(bc->nchain == 0, REG_BADRPT);
+			/* FALLTHROUGH */
+		default:
+			if (p->error != 0)
+				return (false);	/* Definitely not $... */
+			p->next--;
+			wc = WGETNEXT();
+			if ((c & BACKSL) == 0 || may_escape(p, wc))
+				ordinary(p, wc);
+			else
+				SETERROR(REG_EESCAPE);
+			break;
+		}
 	}
 
 	if (EAT('*')) {		/* implemented as +? */
@@ -706,27 +1043,35 @@
 		ASTERN(O_PLUS, pos);
 		INSERT(OQUEST_, pos);
 		ASTERN(O_QUEST, pos);
+#ifdef REGEX_GNU_EXTENSIONS
+	} else if (p->gnuext && EATTWO('\\', '?')) {
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+	} else if (p->gnuext && EATTWO('\\', '+')) {
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+#endif
 	} else if (EATTWO('\\', '{')) {
 		count = p_count(p);
 		if (EAT(',')) {
-			if (MORE() && isdigit((unsigned char)PEEK())) {
+			if (MORE() && isdigit((uch)PEEK())) {
 				count2 = p_count(p);
-				REQUIRE(count <= count2, REG_BADBR);
+				(void)REQUIRE(count <= count2, REG_BADBR);
 			} else		/* single number with comma */
 				count2 = INFINITY;
 		} else		/* just a single number */
 			count2 = count;
-		repeat(p, pos, count, count2, 0);
+		repeat(p, pos, count, count2);
 		if (!EATTWO('\\', '}')) {	/* error heuristics */
 			while (MORE() && !SEETWO('\\', '}'))
 				NEXT();
-			REQUIRE(MORE(), REG_EBRACE);
+			(void)REQUIRE(MORE(), REG_EBRACE);
 			SETERROR(REG_BADBR);
 		}
-	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
-		return(1);
+	} else if (c == '$')     /* $ (but not \$) ends it */
+		return (true);
 
-	return(0);
+	return (false);
 }
 
 /*
@@ -734,105 +1079,95 @@
  == static int p_count(struct parse *p);
  */
 static int			/* the value */
-p_count(
-    struct parse *p)
+p_count(struct parse *p)
 {
 	int count = 0;
 	int ndigits = 0;
 
-	_DIAGASSERT(p != NULL);
-
-	while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) {
+	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
 		count = count*10 + (GETNEXT() - '0');
 		ndigits++;
 	}
 
-	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
 	return(count);
 }
 
 /*
  - p_bracket - parse a bracketed character list
  == static void p_bracket(struct parse *p);
- *
- * Note a significant property of this code:  if the allocset() did SETERROR,
- * no set operations are done.
  */
 static void
-p_bracket(
-    struct parse *p)
+p_bracket(struct parse *p)
 {
 	cset *cs;
-	int invert = 0;
-	_DIAGASSERT(p != NULL);
-
-	cs = allocset(p);
-	if (cs == NULL)
-		return;
+	wint_t ch;
 
 	/* Dept of Truly Sickening Special-Case Kludges */
-	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
-					    (size_t)6) == 0) {
+	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
 		EMIT(OBOW, 0);
 		NEXTn(6);
 		return;
 	}
-	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]",
-					    (size_t)6) == 0) {
+	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
 		EMIT(OEOW, 0);
 		NEXTn(6);
 		return;
 	}
 
+	if ((cs = allocset(p)) == NULL)
+		return;
+
+	if (p->g->cflags&REG_ICASE)
+		cs->icase = 1;
 	if (EAT('^'))
-		invert++;	/* make note to invert set at end */
+		cs->invert = 1;
 	if (EAT(']'))
-		CHadd(cs, ']');
+		CHadd(p, cs, ']');
 	else if (EAT('-'))
-		CHadd(cs, '-');
+		CHadd(p, cs, '-');
 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
 		p_b_term(p, cs);
 	if (EAT('-'))
-		CHadd(cs, '-');
-	MUSTEAT(']', REG_EBRACK);
+		CHadd(p, cs, '-');
+	(void)MUSTEAT(']', REG_EBRACK);
 
 	if (p->error != 0)	/* don't mess things up further */
 		return;
 
-	if (p->g->cflags&REG_ICASE) {
-		ssize_t i;
-		int ci;
+	if (cs->invert && p->g->cflags&REG_NEWLINE)
+		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
 
-		for (i = p->g->csetsize - 1; i >= 0; i--)
-			if (CHIN(cs, i) && isalpha(i)) {
-				ci = othercase((int)i);
-				if (ci != i)
-					CHadd(cs, ci);
-			}
-		if (cs->multis != NULL)
-			mccase(p, cs);
-	}
-	if (invert) {
-		ssize_t i;
-
-		for (i = p->g->csetsize - 1; i >= 0; i--)
-			if (CHIN(cs, i))
-				CHsub(cs, (int)i);
-			else
-				CHadd(cs, (int)i);
-		if (p->g->cflags&REG_NEWLINE)
-			CHsub(cs, '\n');
-		if (cs->multis != NULL)
-			mcinvert(p, cs);
-	}
-
-	assert(cs->multis == NULL);		/* xxx */
-
-	if (nch(p, cs) == 1) {		/* optimize singleton sets */
-		ordinary(p, firstch(p, cs));
+	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
+		ordinary(p, ch);
 		freeset(p, cs);
 	} else
-		EMIT(OANYOF, freezeset(p, cs));
+		EMIT(OANYOF, (size_t)(cs - p->g->sets));
+}
+
+static int
+p_range_cmp(wchar_t c1, wchar_t c2)
+{
+#ifdef REGEX_LIBC_COLLATE
+	return __wcollate_range_cmp(c1, c2);
+#elif defined(NLS)
+	/* Copied from libc/collate __wcollate_range_cmp */
+	wchar_t s1[2], s2[2];
+
+	s1[0] = c1;
+	s1[1] = L'\0';
+	s2[0] = c2;
+	s2[1] = L'\0';
+	return wcscoll(s1, s2);
+#else
+	char s1[2], s2[2];
+
+	s1[0] = (char)c1;
+	s1[1] = '\0';
+	s2[0] = (char)c2;
+	s2[1] = '\0';
+	return strcoll(s1, s2);
+#endif
 }
 
 /*
@@ -840,13 +1175,15 @@
  == static void p_b_term(struct parse *p, cset *cs);
  */
 static void
-p_b_term(
-    struct parse *p,
-    cset *cs)
+p_b_term(struct parse *p, cset *cs)
 {
 	char c;
-	char start, finish;
-	int i;
+	wint_t start, finish;
+	wint_t i;
+#ifdef REGEX_LIBC_COLLATE
+	struct xlocale_collate *table =
+		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+#endif
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(cs != NULL);
@@ -856,11 +1193,9 @@
 	case '[':
 		c = (MORE2()) ? PEEK2() : '\0';
 		break;
-
 	case '-':
 		SETERROR(REG_ERANGE);
 		return;			/* NOTE RETURN */
-
 	default:
 		c = '\0';
 		break;
@@ -869,24 +1204,23 @@
 	switch (c) {
 	case ':':		/* character class */
 		NEXT2();
-		REQUIRE(MORE(), REG_EBRACK);
+		(void)REQUIRE(MORE(), REG_EBRACK);
 		c = PEEK();
-		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
 		p_b_cclass(p, cs);
-		REQUIRE(MORE(), REG_EBRACK);
-		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+		(void)REQUIRE(MORE(), REG_EBRACK);
+		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
 		break;
 	case '=':		/* equivalence class */
 		NEXT2();
-		REQUIRE(MORE(), REG_EBRACK);
+		(void)REQUIRE(MORE(), REG_EBRACK);
 		c = PEEK();
-		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
 		p_b_eclass(p, cs);
-		REQUIRE(MORE(), REG_EBRACK);
-		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+		(void)REQUIRE(MORE(), REG_EBRACK);
+		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
 		break;
 	default:		/* symbol, ordinary character, or range */
-/* xxx revision needed for multichar stuff */
 		start = p_b_symbol(p);
 		if (SEE('-') && MORE2() && PEEK2() != ']') {
 			/* range */
@@ -897,51 +1231,103 @@
 				finish = p_b_symbol(p);
 		} else
 			finish = start;
-/* xxx what about signed chars here... */
-		REQUIRE(start <= finish, REG_ERANGE);
-		for (i = start; i <= finish; i++)
-			CHadd(cs, i);
+		if (start == finish)
+			CHadd(p, cs, start);
+		else {
+#ifdef REGEX_LIBC_COLLATE
+			if (table->__collate_load_error || MB_CUR_MAX > 1) {
+#else
+			if (MB_CUR_MAX > 1) {
+#endif
+				(void)REQUIRE(start <= finish, REG_ERANGE);
+				CHaddrange(p, cs, start, finish);
+			} else {
+				(void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE);
+				for (i = 0; i <= UCHAR_MAX; i++) {
+					if (p_range_cmp(start, i) <= 0 &&
+					    p_range_cmp(i, finish) <= 0 )
+						CHadd(p, cs, i);
+				}
+			}
+		}
 		break;
 	}
 }
 
+#ifdef REGEX_GNU_EXTENSIONS
+/*
+ - p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S)
+ == static int p_b_pseudoclass(struct parse *p, char c)
+ */
+static int
+p_b_pseudoclass(struct parse *p, char c) {
+	cset *cs;
+
+	if ((cs = allocset(p)) == NULL)
+		return(0);
+
+	if (p->g->cflags&REG_ICASE)
+		cs->icase = 1;
+
+	switch (c) {
+	case 'W':
+		cs->invert = 1;
+		/* FALLTHROUGH */
+	case 'w':
+		p_b_cclass_named(p, cs, "alnum");
+		break;
+	case 'S':
+		cs->invert = 1;
+		/* FALLTHROUGH */
+	case 's':
+		p_b_cclass_named(p, cs, "space");
+		break;
+	default:
+		return(0);
+	}
+
+	EMIT(OANYOF, (size_t)(cs - p->g->sets));
+	return(1);
+}
+#endif
+
 /*
  - p_b_cclass - parse a character-class name and deal with it
  == static void p_b_cclass(struct parse *p, cset *cs);
  */
 static void
-p_b_cclass(
-    struct parse *p,
-    cset *cs)
+p_b_cclass(struct parse *p, cset *cs)
 {
-	const char *sp;
-	const struct cclass *cp;
+	const char *sp = p->next;
 	size_t len;
-	const char *u;
-	char c;
+	char clname[16];
 
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	sp = p->next;
-
-	while (MORE() && isalpha((unsigned char)PEEK()))
+	while (MORE() && isalpha((uch)PEEK()))
 		NEXT();
 	len = p->next - sp;
-	for (cp = cclasses; cp->name != NULL; cp++)
-		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
-			break;
-	if (cp->name == NULL) {
-		/* oops, didn't find it */
+	if (len >= sizeof(clname) - 1) {
 		SETERROR(REG_ECTYPE);
 		return;
 	}
+	memcpy(clname, sp, len);
+	clname[len] = '\0';
 
-	u = cp->chars;
-	while ((c = *u++) != '\0')
-		CHadd(cs, c);
-	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
-		MCadd(p, cs, u);
+	p_b_cclass_named(p, cs, clname);
+}
+
+/*
+ - p_b_cclass_named - deal with a named character class
+ == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
+ */
+static void
+p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) {
+	wctype_t wct;
+
+	if ((wct = wctype(clname)) == 0) {
+		SETERROR(REG_ECTYPE);
+		return;
+	}
+	CHaddtype(p, cs, wct);
 }
 
 /*
@@ -951,58 +1337,52 @@
  * This implementation is incomplete. xxx
  */
 static void
-p_b_eclass(
-    struct parse *p,
-    cset *cs)
+p_b_eclass(struct parse *p, cset *cs)
 {
-	char c;
+	wint_t c;
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(cs != NULL);
 
 	c = p_b_coll_elem(p, '=');
-	CHadd(cs, c);
+	CHadd(p, cs, c);
 }
 
 /*
  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
- == static char p_b_symbol(struct parse *p);
+ == static wint_t p_b_symbol(struct parse *p);
  */
-static char			/* value of symbol */
-p_b_symbol(
-    struct parse *p)
+static wint_t			/* value of symbol */
+p_b_symbol(struct parse *p)
 {
-	char value;
+	wint_t value;
 
 	_DIAGASSERT(p != NULL);
 
-	REQUIRE(MORE(), REG_EBRACK);
+	(void)REQUIRE(MORE(), REG_EBRACK);
 	if (!EATTWO('[', '.'))
-		return(GETNEXT());
+		return(WGETNEXT());
 
 	/* collating symbol */
 	value = p_b_coll_elem(p, '.');
-	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
 	return(value);
 }
 
 /*
  - p_b_coll_elem - parse a collating-element name and look it up
- == static char p_b_coll_elem(struct parse *p, int endc);
+ == static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
  */
-static char			/* value of collating element */
-p_b_coll_elem(
-    struct parse *p,
-    int endc)			/* name ended by endc,']' */
+static wint_t			/* value of collating element */
+p_b_coll_elem(struct parse *p,
+	wint_t endc)		/* name ended by endc,']' */
 {
-	const char *sp;
-	const struct cname *cp;
+	const char *sp = p->next;
+	struct cname *cp;
 	size_t len;
 
 	_DIAGASSERT(p != NULL);
 
-	sp = p->next;
-
 	while (MORE() && !SEETWO(endc, ']'))
 		NEXT();
 	if (!MORE()) {
@@ -1013,85 +1393,152 @@
 	for (cp = cnames; cp->name != NULL; cp++)
 		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
 			return(cp->code);	/* known name */
-	if (len == 1)
-		return(*sp);	/* single character */
-	SETERROR(REG_ECOLLATE);			/* neither */
+#ifdef NLS
+	mbstate_t mbs;
+	wchar_t wc;
+	size_t clen;
+
+	memset(&mbs, 0, sizeof(mbs));
+	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
+		return (wc);			/* single character */
+	else if (clen == (size_t)-1 || clen == (size_t)-2)
+		SETERROR(REG_ILLSEQ);
+	else
+		SETERROR(REG_ECOLLATE);		/* neither */
 	return(0);
+#else
+	if (len == 1)
+		return *sp;    /* single character */
+	SETERROR(REG_ECOLLATE);                 /* neither */
+	return 0;
+#endif
+}
+
+/*
+ - may_escape - determine whether 'ch' is escape-able in the current context
+ == static int may_escape(struct parse *p, const wint_t ch)
+ */
+static bool
+may_escape(struct parse *p, const wint_t ch)
+{
+
+	if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
+		return (true);
+	if (isalpha(ch) || ch == '\'' || ch == '`')
+		return (false);
+	return (true);
+#ifdef NOTYET
+	/*
+	 * Build a whitelist of characters that may be escaped to produce an
+	 * ordinary in the current context. This assumes that these have not
+	 * been otherwise interpreted as a special character. Escaping an
+	 * ordinary character yields undefined results according to
+	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
+	 * advantage of this and use escaped ordinary characters to provide
+	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
+	 */
+	switch(ch) {
+	case '|':
+	case '+':
+	case '?':
+		/* The above characters may not be escaped in BREs */
+		if (!(p->g->cflags&REG_EXTENDED))
+			return (false);
+		/* Fallthrough */
+	case '(':
+	case ')':
+	case '{':
+	case '}':
+	case '.':
+	case '[':
+	case ']':
+	case '\\':
+	case '*':
+	case '^':
+	case '$':
+		return (true);
+	default:
+		return (false);
+	}
+#endif
 }
 
 /*
  - othercase - return the case counterpart of an alphabetic
- == static int othercase(int ch);
+ == static wint_t othercase(wint_t ch);
  */
-static int			/* if no counterpart, return ch */
-othercase(
-    int ch)
+static wint_t			/* if no counterpart, return ch */
+othercase(wint_t ch)
 {
-	assert(isalpha(ch));
-	if (isupper(ch))
-		return(tolower(ch));
-	else if (islower(ch))
-		return(toupper(ch));
+	assert(iswalpha(ch));
+	if (iswupper(ch))
+		return(towlower(ch));
+	else if (iswlower(ch))
+		return(towupper(ch));
 	else			/* peculiar, but could happen */
 		return(ch);
 }
 
 /*
  - bothcases - emit a dualcase version of a two-case character
- == static void bothcases(struct parse *p, int ch);
+ == static void bothcases(struct parse *p, wint_t ch);
  *
  * Boy, is this implementation ever a kludge...
  */
 static void
-bothcases(
-    struct parse *p,
-    int ch)
+bothcases(struct parse *p, wint_t ch)
 {
-	const char *oldnext;
-	const char *oldend;
-	char bracket[3];
+	const char *oldnext = p->next;
+	const char *oldend = p->end;
+	char bracket[3 + MB_LEN_MAX];
+	size_t n;
 
 	_DIAGASSERT(p != NULL);
 
-	oldnext = p->next;
-	oldend = p->end;
-
 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
 	p->next = bracket;
-	p->end = bracket+2;
-	bracket[0] = ch;
-	bracket[1] = ']';
-	bracket[2] = '\0';
+#ifdef NLS
+	mbstate_t mbs;
+	memset(&mbs, 0, sizeof(mbs));
+	n = wcrtomb(bracket, ch, &mbs);
+	assert(n != (size_t)-1);
+#else
+	n = 0;
+	bracket[n++] = ch;
+#endif
+	bracket[n] = ']';
+	bracket[n + 1] = '\0';
+	p->end = bracket+n+1;
 	p_bracket(p);
-	assert(p->next == bracket+2);
+	assert(p->next == p->end);
 	p->next = oldnext;
 	p->end = oldend;
 }
 
 /*
  - ordinary - emit an ordinary character
- == static void ordinary(struct parse *p, int ch);
+ == static void ordinary(struct parse *p, wint_t ch);
  */
 static void
-ordinary(
-    struct parse *p,
-    int ch)
+ordinary(struct parse *p, wint_t ch)
 {
-	cat_t *cap;
-	unsigned char uc = (unsigned char)ch;
+	cset *cs;
 
 	_DIAGASSERT(p != NULL);
 
-	cap = p->g->categories;
-	if ((p->g->cflags & REG_ICASE) && isalpha(uc) && othercase(uc) != uc)
-		bothcases(p, uc);
+	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
+		bothcases(p, ch);
+	else if ((wint_t)(ch & OPDMASK) == ch)
+		EMIT(OCHAR, (size_t)ch);
 	else {
-		EMIT(OCHAR, (sopno)uc);
-		if (cap[uc] == 0) {
-			_DIAGASSERT(__type_fit(unsigned char,
-			    p->g->ncategories + 1));
-			cap[uc] = (unsigned char)p->g->ncategories++;
-		}
+		/*
+		 * Kludge: character is too big to fit into an OCHAR operand.
+		 * Emit a singleton set.
+		 */
+		if ((cs = allocset(p)) == NULL)
+			return;
+		CHadd(p, cs, ch);
+		EMIT(OANYOF, (size_t)(cs - p->g->sets));
 	}
 }
 
@@ -1102,18 +1549,14 @@
  * Boy, is this implementation ever a kludge...
  */
 static void
-nonnewline(
-    struct parse *p)
+nonnewline(struct parse *p)
 {
-	const char *oldnext;
-	const char *oldend;
+	const char *oldnext = p->next;
+	const char *oldend = p->end;
 	char bracket[4];
 
 	_DIAGASSERT(p != NULL);
 
-	oldnext = p->next;
-	oldend = p->end;
-
 	p->next = bracket;
 	p->end = bracket+3;
 	bracket[0] = '^';
@@ -1128,18 +1571,15 @@
 
 /*
  - repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(struct parse *p, sopno start, int from, int to,
- == size_t reclimit);
+ == static void repeat(struct parse *p, sopno start, int from, int to);
  */
 static void
-repeat(
-    struct parse *p,
-    sopno start,		/* operand from here to end of strip */
-    int from,			/* repeated from this number */
-    int to,			/* to this number of times (maybe INFINITY) */
-    size_t reclimit)
+repeat(struct parse *p,
+	sopno start,		/* operand from here to end of strip */
+	int from,		/* repeated from this number */
+	int to)			/* to this number of times (maybe INFINITY) */
 {
-	sopno finish;
+	sopno finish = HERE();
 #	define	N	2
 #	define	INF	3
 #	define	REP(f, t)	((f)*8 + (t))
@@ -1148,13 +1588,9 @@
 
 	_DIAGASSERT(p != NULL);
 
-	if (reclimit++ > RECLIMIT) 
-		p->error = REG_ESPACE;
-	if (p->error)
+	if (p->error != 0)	/* head off possible runaway recursion */
 		return;
 
-	finish = HERE();
-
 	assert(from <= to);
 
 	switch (REP(MAP(from), MAP(to))) {
@@ -1166,7 +1602,7 @@
 	case REP(0, INF):		/* as x{1,}? */
 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
 		INSERT(OCH_, start);		/* offset is wrong... */
-		repeat(p, start+1, 1, to, reclimit);
+		repeat(p, start+1, 1, to);
 		ASTERN(OOR1, start);
 		AHEAD(start);			/* ... fix it */
 		EMIT(OOR2, 0);
@@ -1186,7 +1622,7 @@
 		ASTERN(O_CH, THERETHERE());
 		copy = dupl(p, start+1, finish+1);
 		assert(copy == finish+4);
-		repeat(p, copy, 1, to-1, reclimit);
+		repeat(p, copy, 1, to-1);
 		break;
 	case REP(1, INF):		/* as x+ */
 		INSERT(OPLUS_, start);
@@ -1194,11 +1630,11 @@
 		break;
 	case REP(N, N):			/* as xx{m-1,n-1} */
 		copy = dupl(p, start, finish);
-		repeat(p, copy, from-1, to-1, reclimit);
+		repeat(p, copy, from-1, to-1);
 		break;
 	case REP(N, INF):		/* as xx{n-1,INF} */
 		copy = dupl(p, start, finish);
-		repeat(p, copy, from-1, to, reclimit);
+		repeat(p, copy, from-1, to);
 		break;
 	default:			/* "can't happen" */
 		SETERROR(REG_ASSERT);	/* just in case */
@@ -1207,13 +1643,39 @@
 }
 
 /*
+ - wgetnext - helper function for WGETNEXT() macro. Gets the next wide
+ - character from the parse struct, signals a REG_ILLSEQ error if the
+ - character can't be converted. Returns the number of bytes consumed.
+ */
+static wint_t
+wgetnext(struct parse *p)
+{
+#ifdef NLS
+	mbstate_t mbs;
+	wchar_t wc;
+	size_t n;
+
+	memset(&mbs, 0, sizeof(mbs));
+	n = mbrtowc(&wc, p->next, (size_t)(p->end - p->next), &mbs);
+	if (n == (size_t)-1 || n == (size_t)-2) {
+		SETERROR(REG_ILLSEQ);
+		return (0);
+	}
+	if (n == 0)
+		n = 1;
+	p->next += n;
+	return wc;
+#else
+	return *p->next++;
+#endif
+}
+
+/*
  - seterr - set an error condition
  == static int seterr(struct parse *p, int e);
  */
 static int			/* useless but makes type checking happy */
-seterr(
-    struct parse *p,
-    int e)
+seterr(struct parse *p, int e)
 {
 
 	_DIAGASSERT(p != NULL);
@@ -1230,55 +1692,22 @@
  == static cset *allocset(struct parse *p);
  */
 static cset *
-allocset(
-    struct parse *p)
+allocset(struct parse *p)
 {
-	size_t no;
-	size_t nc;
-	size_t nbytes;
-	cset *cs;
-	size_t css;
-	size_t i;
-	void *old_ptr;
+	cset *cs, *ncs;
 
 	_DIAGASSERT(p != NULL);
 
-	no = p->g->ncsets++;
-	css = (size_t)p->g->csetsize;
-	if (no >= p->ncsalloc) {	/* need another column of space */
-		p->ncsalloc += CHAR_BIT;
-		nc = p->ncsalloc;
-		assert(nc % CHAR_BIT == 0);
-		nbytes = nc / CHAR_BIT * css;
-		if (MEMSIZE(p) > MEMLIMIT)
-			goto oomem;
-		if (reallocarr(&p->g->sets, nc, sizeof(cset)))
-			goto oomem;
-		old_ptr = p->g->setbits;
-		if (reallocarr(&p->g->setbits, nc / CHAR_BIT, css)) {
-			free(old_ptr);
-			goto oomem;
-		}
-		if (old_ptr != p->g->setbits) {
-			for (i = 0; i < no; i++)
-				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
-		}
-		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+	ncs = reallocarray(p->g->sets, p->g->ncsets + 1, sizeof(*ncs));
+	if (ncs == NULL) {
+		SETERROR(REG_ESPACE);
+		return (NULL);
 	}
-
-	cs = &p->g->sets[no];
-	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
-	cs->mask = 1 << (unsigned int)((no) % CHAR_BIT);
-	cs->hash = 0;
-	cs->smultis = 0;
-	cs->multis = NULL;
+	p->g->sets = ncs;
+	cs = &p->g->sets[p->g->ncsets++];
+	memset(cs, 0, sizeof(*cs));
 
 	return(cs);
-
-oomem:
-	SETERROR(REG_ESPACE);
-	/* caller's responsibility not to do set ops */
-	return NULL;
 }
 
 /*
@@ -1286,353 +1715,128 @@
  == static void freeset(struct parse *p, cset *cs);
  */
 static void
-freeset(
-    struct parse *p,
-    cset *cs)
+freeset(struct parse *p, cset *cs)
 {
-	size_t i;
 	cset *top;
-	size_t css;
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(cs != NULL);
 
 	top = &p->g->sets[p->g->ncsets];
-	css = (size_t)p->g->csetsize;
 
-	for (i = 0; i < css; i++)
-		CHsub(cs, (int)i);
+	free(cs->wides);
+	free(cs->ranges);
+	free(cs->types);
+	memset(cs, 0, sizeof(*cs));
 	if (cs == top-1)	/* recover only the easy case */
 		p->g->ncsets--;
 }
 
 /*
- - freezeset - final processing on a set of characters
- == static int freezeset(struct parse *p, cset *cs);
- *
- * The main task here is merging identical sets.  This is usually a waste
- * of time (although the hash code minimizes the overhead), but can win
- * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
- * is done using addition rather than xor -- all ASCII [aA] sets xor to
- * the same value!
+ - singleton - Determine whether a set contains only one character,
+ - returning it if so, otherwise returning OUT.
  */
-static sopno			/* set number */
-freezeset(
-    struct parse *p,
-    cset *cs)
+static wint_t
+singleton(cset *cs)
 {
-	uch h;
-	size_t i;
-	cset *top;
-	cset *cs2;
-	size_t css;
+	wint_t i, s, n;
 
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	h = cs->hash;
-	top = &p->g->sets[p->g->ncsets];
-	css = (size_t)p->g->csetsize;
-
-	/* look for an earlier one which is the same */
-	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
-		if (cs2->hash == h && cs2 != cs) {
-			/* maybe */
-			for (i = 0; i < css; i++)
-				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
-					break;		/* no */
-			if (i == css)
-				break;			/* yes */
-		}
-
-	if (cs2 < top) {	/* found one */
-		freeset(p, cs);
-		cs = cs2;
-	}
-
-	return (sopno)(cs - p->g->sets);
-}
-
-/*
- - firstch - return first character in a set (which must have at least one)
- == static int firstch(struct parse *p, cset *cs);
- */
-static int			/* character; there is no "none" value */
-firstch(
-    struct parse *p,
-    cset *cs)
-{
-	size_t i;
-	size_t css;
-
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	css = (size_t)p->g->csetsize;
-
-	for (i = 0; i < css; i++)
-		if (CHIN(cs, i))
-			return((char)i);
-	assert(never);
-	return(0);		/* arbitrary */
-}
-
-/*
- - nch - number of characters in a set
- == static int nch(struct parse *p, cset *cs);
- */
-static int
-nch(
-    struct parse *p,
-    cset *cs)
-{
-	size_t i;
-	size_t css;
-	int n = 0;
-
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	css = (size_t)p->g->csetsize;
-
-	for (i = 0; i < css; i++)
-		if (CHIN(cs, i))
+	for (i = n = 0; i < NC; i++)
+		if (CHIN(cs, i)) {
 			n++;
-	return(n);
+			s = i;
+		}
+	if (n == 1)
+		return (s);
+	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
+	    cs->icase == 0)
+		return (cs->wides[0]);
+	/* Don't bother handling the other cases. */
+	return (OUT);
 }
 
 /*
- - mcadd - add a collating element to a cset
- == static void mcadd(struct parse *p, cset *cs, \
- ==	char *cp);
+ - CHadd - add character to character set.
  */
 static void
-mcadd(
-    struct parse *p,
-    cset *cs,
-    const char *cp)
+CHadd(struct parse *p, cset *cs, wint_t ch)
 {
-	size_t oldend;
+	wint_t nch, *newwides;
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(cs != NULL);
-	_DIAGASSERT(cp != NULL);
 
-	oldend = cs->smultis;
+	assert(ch >= 0);
+	if (ch < NC)
+		cs->bmp[(unsigned)ch >> 3] |= 1 << (ch & 7);
+	else {
+		newwides = reallocarray(cs->wides, cs->nwides + 1,
+		    sizeof(*cs->wides));
+		if (newwides == NULL) {
+			SETERROR(REG_ESPACE);
+			return;
+		}
+		cs->wides = newwides;
+		cs->wides[cs->nwides++] = ch;
+	}
+	if (cs->icase) {
+		if ((nch = towlower(ch)) < NC)
+			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
+		if ((nch = towupper(ch)) < NC)
+			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
+	}
+}
 
-	cs->smultis += strlen(cp) + 1;
-	if (cs->multis == NULL)
-		cs->multis = malloc(cs->smultis);
-	else
-		cs->multis = realloc(cs->multis, cs->smultis);
-	if (cs->multis == NULL) {
+/*
+ - CHaddrange - add all characters in the range [min,max] to a character set.
+ */
+static void
+CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
+{
+	crange *newranges;
+
+	_DIAGASSERT(p != NULL);
+	_DIAGASSERT(cs != NULL);
+
+	for (; min < NC && min <= max; min++)
+		CHadd(p, cs, min);
+	if (min >= max)
+		return;
+	newranges = reallocarray(cs->ranges, cs->nranges + 1,
+	    sizeof(*cs->ranges));
+	if (newranges == NULL) {
 		SETERROR(REG_ESPACE);
 		return;
 	}
-
-	(void) strcpy(cs->multis + oldend - 1, cp);
-	cs->multis[cs->smultis - 1] = '\0';
+	cs->ranges = newranges;
+	cs->ranges[cs->nranges].min = min;
+	cs->ranges[cs->nranges].max = max;
+	cs->nranges++;
 }
 
-#if 0
 /*
- - mcsub - subtract a collating element from a cset
- == static void mcsub(cset *cs, char *cp);
+ - CHaddtype - add all characters of a certain type to a character set.
  */
 static void
-mcsub(
-    cset *cs,
-    char *cp)
+CHaddtype(struct parse *p, cset *cs, wctype_t wct)
 {
-	char *fp;
-	size_t len;
+	wint_t i;
+	wctype_t *newtypes;
 
+	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(cs != NULL);
-	_DIAGASSERT(cp != NULL);
 
-	fp = mcfind(cs, cp);
-	len = strlen(fp);
-
-	assert(fp != NULL);
-	(void) memmove(fp, fp + len + 1,
-				cs->smultis - (fp + len + 1 - cs->multis));
-	cs->smultis -= len;
-
-	if (cs->smultis == 0) {
-		free(cs->multis);
-		cs->multis = NULL;
+	for (i = 0; i < NC; i++)
+		if (iswctype(i, wct))
+			CHadd(p, cs, i);
+	newtypes = reallocarray(cs->types, cs->ntypes + 1,
+	    sizeof(*cs->types));
+	if (newtypes == NULL) {
+		SETERROR(REG_ESPACE);
 		return;
 	}
-
-	cs->multis = realloc(cs->multis, cs->smultis);
-	assert(cs->multis != NULL);
-}
-
-/*
- - mcin - is a collating element in a cset?
- == static int mcin(cset *cs, char *cp);
- */
-static int
-mcin(
-    cset *cs,
-    char *cp)
-{
-
-	_DIAGASSERT(cs != NULL);
-	_DIAGASSERT(cp != NULL);
-
-	return(mcfind(cs, cp) != NULL);
-}
-
-/*
- - mcfind - find a collating element in a cset
- == static char *mcfind(cset *cs, char *cp);
- */
-static char *
-mcfind(
-    cset *cs,
-    char *cp)
-{
-	char *p;
-
-	_DIAGASSERT(cs != NULL);
-	_DIAGASSERT(cp != NULL);
-
-	if (cs->multis == NULL)
-		return(NULL);
-	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
-		if (strcmp(cp, p) == 0)
-			return(p);
-	return(NULL);
-}
-#endif
-
-/*
- - mcinvert - invert the list of collating elements in a cset
- == static void mcinvert(struct parse *p, cset *cs);
- *
- * This would have to know the set of possibilities.  Implementation
- * is deferred.
- */
-/* ARGSUSED */
-static void
-mcinvert(
-    struct parse *p,
-    cset *cs)
-{
-
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	assert(cs->multis == NULL);	/* xxx */
-}
-
-/*
- - mccase - add case counterparts of the list of collating elements in a cset
- == static void mccase(struct parse *p, cset *cs);
- *
- * This would have to know the set of possibilities.  Implementation
- * is deferred.
- */
-/* ARGSUSED */
-static void
-mccase(
-    struct parse *p,
-    cset *cs)
-{
-
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(cs != NULL);
-
-	assert(cs->multis == NULL);	/* xxx */
-}
-
-/*
- - isinsets - is this character in any sets?
- == static int isinsets(struct re_guts *g, int c);
- */
-static int			/* predicate */
-isinsets(
-    struct re_guts *g,
-    int c)
-{
-	uch *col;
-	size_t i;
-	size_t ncols;
-	unsigned uc = (unsigned char)c;
-
-	_DIAGASSERT(g != NULL);
-
-	if (g->setbits == NULL)
-		return 0;
-
-	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
-
-	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
-		if (col[uc] != 0)
-			return(1);
-	return(0);
-}
-
-/*
- - samesets - are these two characters in exactly the same sets?
- == static int samesets(struct re_guts *g, int c1, int c2);
- */
-static int			/* predicate */
-samesets(
-    struct re_guts *g,
-    int c1,
-    int c2)
-{
-	uch *col;
-	size_t i;
-	size_t ncols;
-	unsigned uc1 = (unsigned char)c1;
-	unsigned uc2 = (unsigned char)c2;
-
-	_DIAGASSERT(g != NULL);
-
-	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
-
-	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
-		if (col[uc1] != col[uc2])
-			return(0);
-	return(1);
-}
-
-/*
- - categorize - sort out character categories
- == static void categorize(struct parse *p, struct re_guts *g);
- */
-static void
-categorize(
-    struct parse *p,
-    struct re_guts *g)
-{
-	cat_t *cats;
-	int c;
-	int c2;
-	cat_t cat;
-
-	_DIAGASSERT(p != NULL);
-	_DIAGASSERT(g != NULL);
-
-	cats = g->categories;
-
-	/* avoid making error situations worse */
-	if (p->error != 0)
-		return;
-
-	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
-		if (cats[c] == 0 && isinsets(g, c)) {
-			_DIAGASSERT(__type_fit(unsigned char,
-			    g->ncategories + 1));
-			cat = g->ncategories++;
-			cats[c] = cat;
-			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
-				if (cats[c2] == 0 && samesets(g, c, c2))
-					cats[c2] = cat;
-		}
+	cs->types = newtypes;
+	cs->types[cs->ntypes++] = wct;
 }
 
 /*
@@ -1640,25 +1844,22 @@
  == static sopno dupl(struct parse *p, sopno start, sopno finish);
  */
 static sopno			/* start of duplicate */
-dupl(
-    struct parse *p,
-    sopno start,			/* from here */
-    sopno finish)			/* to this less one */
+dupl(struct parse *p,
+	sopno start,		/* from here */
+	sopno finish)		/* to this less one */
 {
-	sopno ret;
+	sopno ret = HERE();
 	sopno len = finish - start;
 
 	_DIAGASSERT(p != NULL);
 
-	ret = HERE();
-
 	assert(finish >= start);
 	if (len == 0)
 		return(ret);
-	if (!enlarge(p, p->ssize + len))/* this many unexpected additions */
-		return ret;
-	(void)memcpy(p->strip + p->slen, p->strip + start,
-	    (size_t)len * sizeof(sop));
+	if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
+		return(ret);
+	(void) memcpy(p->strip + p->slen,
+	    p->strip + start, len * sizeof(*p->strip));
 	p->slen += len;
 	return(ret);
 }
@@ -1672,17 +1873,14 @@
  * some changes to the data structures.  Maybe later.
  */
 static void
-doemit(
-    struct parse *p,
-    sop op,
-    sopno opnd)
+doemit(struct parse *p, sop op, size_t opnd)
 {
-	_DIAGASSERT(p != NULL);
-
 	/* avoid making error situations worse */
 	if (p->error != 0)
 		return;
 
+	_DIAGASSERT(p != NULL);
+
 	/* deal with oversize operands ("can't happen", more or less) */
 	assert(opnd < 1<<OPSHIFT);
 
@@ -1692,7 +1890,7 @@
 			return;
 
 	/* finally, it's all reduced to the easy case */
-	p->strip[p->slen++] = (sop)SOP(op, opnd);
+	p->strip[p->slen++] = (sopno)SOP(op, opnd);
 }
 
 /*
@@ -1700,11 +1898,7 @@
  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
  */
 static void
-doinsert(
-    struct parse *p,
-    sop op,
-    sopno opnd,
-    sopno pos)
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
 {
 	sopno sn;
 	sop s;
@@ -1732,7 +1926,8 @@
 		}
 	}
 
-	memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop));
+	memmove(&p->strip[pos+1], &p->strip[pos],
+	    (HERE()-pos-1)*sizeof(*p->strip));
 	p->strip[pos] = s;
 }
 
@@ -1741,10 +1936,7 @@
  == static void dofwd(struct parse *p, sopno pos, sop value);
  */
 static void
-dofwd(
-    struct parse *p,
-    sopno pos,
-    sopno value)
+dofwd(struct parse *p, sopno pos, sop value)
 {
 
 	_DIAGASSERT(p != NULL);
@@ -1754,25 +1946,29 @@
 		return;
 
 	assert(value < 1<<OPSHIFT);
-	p->strip[pos] = (sop)(OP(p->strip[pos]) | value);
+	p->strip[pos] = OP(p->strip[pos]) | value;
 }
 
 /*
  - enlarge - enlarge the strip
- == static void enlarge(struct parse *p, sopno size);
+ == static int enlarge(struct parse *p, sopno size);
  */
 static int
 enlarge(struct parse *p, sopno size)
 {
+	sop *sp;
+
 	_DIAGASSERT(p != NULL);
 
 	if (p->ssize >= size)
 		return 1;
 
-	if (MEMSIZE(p) > MEMLIMIT || reallocarr(&p->strip, size, sizeof(sop))) {
+	sp = reallocarray(p->strip, size, sizeof(*p->strip));
+	if (sp == NULL) {
 		SETERROR(REG_ESPACE);
 		return 0;
 	}
+	p->strip = sp;
 	p->ssize = size;
 	return 1;
 }
@@ -1782,18 +1978,18 @@
  == static void stripsnug(struct parse *p, struct re_guts *g);
  */
 static void
-stripsnug(
-    struct parse *p,
-    struct re_guts *g)
+stripsnug(struct parse *p, struct re_guts *g)
 {
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(g != NULL);
 
 	g->nstates = p->slen;
-	g->strip = p->strip;
-	reallocarr(&g->strip, p->slen, sizeof(sop));
-	/* Ignore error as tries to free memory only. */
+	g->strip = reallocarray(p->strip, p->slen, sizeof(*p->strip));
+	if (g->strip == NULL) {
+		SETERROR(REG_ESPACE);
+		g->strip = p->strip;
+	}
 }
 
 /*
@@ -1807,9 +2003,7 @@
  * Note that must and mlen got initialized during setup.
  */
 static void
-findmust(
-    struct parse *p,
-    struct re_guts *g)
+findmust(struct parse *p, struct re_guts *g)
 {
 	sop *scan;
 	sop *start = NULL;
@@ -1817,7 +2011,8 @@
 	sopno newlen;
 	sop s;
 	char *cp;
-	sopno i;
+	int offset;
+	mbstate_t mbs;
 
 	_DIAGASSERT(p != NULL);
 	_DIAGASSERT(g != NULL);
@@ -1826,16 +2021,39 @@
 	if (p->error != 0)
 		return;
 
+#ifdef notyet
+	/*
+	 * It's not generally safe to do a ``char'' substring search on
+	 * multibyte character strings, but it's safe for at least
+	 * UTF-8 (see RFC 3629).
+	 */
+	if (MB_CUR_MAX > 1 &&
+	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
+		return;
+#endif
+
 	/* find the longest OCHAR sequence in strip */
 	newlen = 0;
+	offset = 0;
+	g->moffset = 0;
 	scan = g->strip + 1;
 	do {
 		s = *scan++;
 		switch (OP(s)) {
 		case OCHAR:		/* sequence member */
-			if (newlen == 0)		/* new sequence */
+			if (newlen == 0) {		/* new sequence */
+				memset(&mbs, 0, sizeof(mbs));
 				newstart = scan - 1;
+			}
+#ifdef NLS
+			char buf[MB_LEN_MAX];
+			size_t clen = wcrtomb(buf, (int)OPND(s), &mbs);
+			if (clen == (size_t)-1)
+				goto toohard;
+			newlen += (sopno)clen;
+#else
 			newlen++;
+#endif
 			break;
 		case OPLUS_:		/* things that don't break one */
 		case OLPAREN:
@@ -1843,60 +2061,346 @@
 			break;
 		case OQUEST_:		/* things that must be skipped */
 		case OCH_:
+			offset = altoffset(scan, offset);
 			scan--;
 			do {
 				scan += OPND(s);
 				s = *scan;
 				/* assert() interferes w debug printouts */
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-							OP(s) != OOR2) {
+				if (OP(s) != O_QUEST &&
+				    OP(s) != O_CH && OP(s) != OOR2) {
 					g->iflags |= BAD;
 					return;
 				}
 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
 			/* FALLTHROUGH */
-		default:		/* things that break a sequence */
-			if (newlen > g->mlen) {		/* ends one */
+		case OBOW:		/* things that break a sequence */
+		case OEOW:
+		case OBOL:
+		case OEOL:
+		case OBOS:
+		case OEOS:
+		case OWBND:
+		case ONWBND:
+		case O_QUEST:
+		case O_CH:
+		case OEND:
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
+				if (offset > -1) {
+					g->moffset += offset;
+					offset = newlen;
+				} else
+					g->moffset = offset;
+			} else {
+				if (offset > -1)
+					offset += newlen;
 			}
 			newlen = 0;
 			break;
+		case OANY:
+			if (newlen > (sopno)g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+				if (offset > -1) {
+					g->moffset += offset;
+					offset = newlen;
+				} else
+					g->moffset = offset;
+			} else {
+				if (offset > -1)
+					offset += newlen;
+			}
+			if (offset > -1)
+				offset++;
+			newlen = 0;
+			break;
+		case OANYOF:		/* may or may not invalidate offset */
+			/* First, everything as OANY */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+				if (offset > -1) {
+					g->moffset += offset;
+					offset = newlen;
+				} else
+					g->moffset = offset;
+			} else {
+				if (offset > -1)
+					offset += newlen;
+			}
+			if (offset > -1)
+				offset++;
+			newlen = 0;
+			break;
+#ifdef NLS
+		toohard:/*FALLTHROUGH*/
+#endif
+		default:
+			/* Anything here makes it impossible or too hard
+			 * to calculate the offset -- so we give up;
+			 * save the last known good offset, in case the
+			 * must sequence doesn't occur later.
+			 */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+				if (offset > -1)
+					g->moffset += offset;
+				else
+					g->moffset = offset;
+			}
+			offset = -1;
+			newlen = 0;
+			break;
 		}
 	} while (OP(s) != OEND);
 
-	if (start == NULL)
-		g->mlen = 0;
-
-	if (g->mlen == 0)	/* there isn't one */
+	if (g->mlen == 0) {		/* there isn't one */
+		g->moffset = -1;
 		return;
+	}
 
 	/* turn it into a character string */
 	g->must = malloc((size_t)g->mlen + 1);
 	if (g->must == NULL) {		/* argh; just forget it */
 		g->mlen = 0;
+		g->moffset = -1;
 		return;
 	}
 	cp = g->must;
 	scan = start;
-	for (i = g->mlen; i > 0; i--) {
+	memset(&mbs, 0, sizeof(mbs));
+	while (cp < g->must + g->mlen) {
 		while (OP(s = *scan++) != OCHAR)
 			continue;
-		assert(cp < g->must + g->mlen);
-		*cp++ = (char)OPND(s);
+#ifdef NLS
+		size_t clen = wcrtomb(cp, (int)OPND(s), &mbs);
+		assert(clen != (size_t)-1);
+		cp += clen;
+#else
+		*cp++ = OPND(s);
+#endif
 	}
 	assert(cp == g->must + g->mlen);
 	*cp++ = '\0';		/* just on general principles */
 }
 
 /*
+ - altoffset - choose biggest offset among multiple choices
+ == static int altoffset(sop *scan, int offset);
+ *
+ * Compute, recursively if necessary, the largest offset among multiple
+ * re paths.
+ */
+static int
+altoffset(sop *scan, int offset)
+{
+	int largest;
+	int try;
+	sop s;
+
+	_DIAGASSERT(scan != NULL);
+
+	/* If we gave up already on offsets, return */
+	if (offset == -1)
+		return -1;
+
+	largest = 0;
+	try = 0;
+	s = *scan++;
+	while (OP(s) != O_QUEST && OP(s) != O_CH) {
+		switch (OP(s)) {
+		case OOR1:
+			if (try > largest)
+				largest = try;
+			try = 0;
+			break;
+		case OQUEST_:
+		case OCH_:
+			try = altoffset(scan, try);
+			if (try == -1)
+				return -1;
+			scan--;
+			do {
+				scan += OPND(s);
+				s = *scan;
+				if (OP(s) != O_QUEST &&
+				    OP(s) != O_CH && OP(s) != OOR2)
+					return -1;
+			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			/* We must skip to the next position, or we'll
+			 * leave altoffset() too early.
+			 */
+			scan++;
+			break;
+		case OANYOF:
+		case OCHAR:
+		case OANY:
+			try++;
+			/*FALLTHROUGH*/
+		case OBOW:
+		case OEOW:
+		case OWBND:
+		case ONWBND:
+		case OLPAREN:
+		case ORPAREN:
+		case OOR2:
+			break;
+		default:
+			try = -1;
+			break;
+		}
+		if (try == -1)
+			return -1;
+		s = *scan++;
+	}
+
+	if (try > largest)
+		largest = try;
+
+	return largest+offset;
+}
+
+/*
+ - computejumps - compute char jumps for BM scan
+ == static void computejumps(struct parse *p, struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A char jump is the number of characters one needs to jump based on
+ * the value of the character from the text that was mismatched.
+ */
+static void
+computejumps(struct parse *p, struct re_guts *g)
+{
+	int ch;
+	size_t mindex;
+
+	_DIAGASSERT(p != NULL);
+	_DIAGASSERT(g != NULL);
+
+	/* Avoid making errors worse */
+	if (p->error != 0)
+		return;
+
+	g->charjump = calloc((NC_MAX + 1), sizeof(*g->charjump));
+	if (g->charjump == NULL)	/* Not a fatal error */
+		return;
+	/* Adjust for signed chars, if necessary */
+	g->charjump = &g->charjump[-(CHAR_MIN)];
+
+	/* If the character does not exist in the pattern, the jump
+	 * is equal to the number of characters in the pattern.
+	 */
+	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
+		g->charjump[ch] = g->mlen;
+
+	/* If the character does exist, compute the jump that would
+	 * take us to the last character in the pattern equal to it
+	 * (notice that we match right to left, so that last character
+	 * is the first one that would be matched).
+	 */
+	for (mindex = 0; mindex < g->mlen; mindex++)
+		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
+}
+
+/*
+ - computematchjumps - compute match jumps for BM scan
+ == static void computematchjumps(struct parse *p, struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A match jump is the number of characters one needs to advance based
+ * on the already-matched suffix.
+ * Notice that all values here are minus (g->mlen-1), because of the way
+ * the search algorithm works.
+ */
+static void
+computematchjumps(struct parse *p, struct re_guts *g)
+{
+	size_t mindex;		/* General "must" iterator */
+	size_t suffix;		/* Keeps track of matching suffix */
+	size_t ssuffix;		/* Keeps track of suffixes' suffix */
+	size_t* pmatches;	/* pmatches[k] points to the next i
+				 * such that i+1...mlen is a substring
+				 * of k+1...k+mlen-i-1
+				 */
+
+	_DIAGASSERT(p != NULL);
+	_DIAGASSERT(g != NULL);
+
+	/* Avoid making errors worse */
+	if (p->error != 0)
+		return;
+
+	pmatches = calloc(g->mlen, sizeof(*pmatches));
+	if (pmatches == NULL) {
+		g->matchjump = NULL;
+		return;
+	}
+
+	g->matchjump = calloc(g->mlen, sizeof(*g->matchjump));
+	if (g->matchjump == NULL) {	/* Not a fatal error */
+		free(pmatches);
+		return;
+	}
+
+	/* Set maximum possible jump for each character in the pattern */
+	for (mindex = 0; mindex < g->mlen; mindex++)
+		g->matchjump[mindex] = 2 * g->mlen - mindex - 1;
+
+	/* Compute pmatches[] */
+	for (suffix = mindex = g->mlen; mindex-- > 0; suffix--) {
+		pmatches[mindex] = suffix;
+
+		/* If a mismatch is found, interrupting the substring,
+		 * compute the matchjump for that position. If no
+		 * mismatch is found, then a text substring mismatched
+		 * against the suffix will also mismatch against the
+		 * substring.
+		 */
+		while (suffix < g->mlen
+		    && g->must[mindex] != g->must[suffix]) {
+			g->matchjump[suffix] = MIN(g->matchjump[suffix],
+			    g->mlen - mindex - 1);
+			suffix = pmatches[suffix];
+		}
+	}
+
+	/* Compute the matchjump up to the last substring found to jump
+	 * to the beginning of the largest must pattern prefix matching
+	 * it's own suffix.
+	 */
+	for (mindex = 0; mindex <= suffix; mindex++)
+		g->matchjump[mindex] = MIN(g->matchjump[mindex],
+		    g->mlen + suffix - mindex);
+
+        ssuffix = pmatches[suffix];
+        while (suffix < g->mlen) {
+                while (suffix <= ssuffix && suffix < g->mlen) {
+                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
+			    g->mlen + ssuffix - suffix);
+                        suffix++;
+                }
+		if (suffix < g->mlen)
+                	ssuffix = pmatches[ssuffix];
+        }
+
+	free(pmatches);
+}
+
+/*
  - pluscount - count + nesting
  == static sopno pluscount(struct parse *p, struct re_guts *g);
  */
 static sopno			/* nesting depth */
-pluscount(
-    struct parse *p,
-    struct re_guts *g)
+pluscount(struct parse *p, struct re_guts *g)
 {
 	sop *scan;
 	sop s;

diff --git a/libc/upstream-netbsd/lib/libc/regex/regerror.c b/libc/upstream-netbsd/lib/libc/regex/regerror.c
index e00d7c0..cfd7704 100644
--- a/libc/upstream-netbsd/lib/libc/regex/regerror.c
+++ b/libc/upstream-netbsd/lib/libc/regex/regerror.c

@@ -1,6 +1,9 @@
-/*	$NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $	*/
+/*	$NetBSD: regerror.c,v 1.26 2022/11/05 11:33:55 riastradh Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -34,76 +37,38 @@
  *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
- */
+#if HAVE_NBTOOL_CONFIG_H
+#include "nbtool_config.h"
+#endif
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)regerror.c	8.4 (Berkeley) 3/20/94";
-#else
-__RCSID("$NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $");
+__FBSDID("$FreeBSD: head/lib/libc/regex/regerror.c 326025 2017-11-20 19:49:47Z pfg $");
 #endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: regerror.c,v 1.26 2022/11/05 11:33:55 riastradh Exp $");
 
 #include "namespace.h"
 #include <sys/types.h>
-
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
+#include <limits.h>
+#include <stdlib.h>
 #include <regex.h>
 
+#include "utils.h"
+
 #ifdef __weak_alias
 __weak_alias(regerror,_regerror)
 #endif
 
-#include "utils.h"
-
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /* === regerror.c === */
-static const char *regatoi(const regex_t *preg, char *localbuf, size_t buflen);
+static const char *regatoi(const regex_t *preg, char *localbufm, size_t buflen);
 
 #ifdef __cplusplus
 }
@@ -126,6 +91,8 @@
  = #define	REG_EMPTY	14
  = #define	REG_ASSERT	15
  = #define	REG_INVARG	16
+ = #define	REG_ENOSYS	17
+ = #define	REG_ILLSEQ	18
  = #define	REG_ATOI	255	// convert name to number (!)
  = #define	REG_ITOA	0400	// convert number to name (!)
  */
@@ -134,36 +101,36 @@
 	const char *name;
 	const char *explain;
 } rerrs[] = {
-	{ REG_NOMATCH,	"REG_NOMATCH",	"regexec() failed to match" },
-	{ REG_BADPAT,	"REG_BADPAT",	"invalid regular expression" },
-	{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
-	{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
-	{ REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)" },
-	{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
-	{ REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced" },
-	{ REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced" },
-	{ REG_EBRACE,	"REG_EBRACE",	"braces not balanced" },
-	{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
-	{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
-	{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
-	{ REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid" },
-	{ REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression" },
-	{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
-	{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine" },
-	{ 0,		"",		"*** unknown regexp error code ***" }
+	{REG_NOMATCH,	"REG_NOMATCH",	"regexec() failed to match"},
+	{REG_BADPAT,	"REG_BADPAT",	"invalid regular expression"},
+	{REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element"},
+	{REG_ECTYPE,	"REG_ECTYPE",	"invalid character class"},
+	{REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)"},
+	{REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number"},
+	{REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced"},
+	{REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced"},
+	{REG_EBRACE,	"REG_EBRACE",	"braces not balanced"},
+	{REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)"},
+	{REG_ERANGE,	"REG_ERANGE",	"invalid character range"},
+	{REG_ESPACE,	"REG_ESPACE",	"out of memory"},
+	{REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid"},
+	{REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression"},
+	{REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug"},
+	{REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine"},
+	{REG_ILLSEQ,	"REG_ILLSEQ",	"illegal byte sequence"},
+	{0,		"",		"*** unknown regexp error code ***"}
 };
 
 /*
- * regerror - the interface to error numbers
- * extern size_t regerror(int, const regex_t *, char *, size_t);
+ - regerror - the interface to error numbers
+ = extern size_t regerror(int, const regex_t *, char *, size_t);
  */
 /* ARGSUSED */
 size_t
-regerror(
-    int errcode,
-    const regex_t *preg,
-    char *errbuf,
-    size_t errbuf_size)
+regerror(int errcode,
+	 const regex_t * __restrict preg,
+	 char * __restrict errbuf,
+	 size_t errbuf_size)
 {
 	const struct rerr *r;
 	size_t len;
@@ -172,21 +139,20 @@
 	char convbuf[50];
 
 	_DIAGASSERT(errcode != REG_ATOI || preg != NULL);
-	_DIAGASSERT(errbuf != NULL);
+	_DIAGASSERT(errbuf_size == 0 || errbuf != NULL);
 
-	if (errcode == REG_ATOI)
+	if (errcode == REG_ATOI) {
 		s = regatoi(preg, convbuf, sizeof convbuf);
-	else {
+	} else {
 		for (r = rerrs; r->code != 0; r++)
 			if (r->code == target)
 				break;
-	
-		if (errcode & REG_ITOA) {
-			if (r->code != 0) {
-				(void)strlcpy(convbuf, r->name, sizeof convbuf);
-			} else
-				(void)snprintf(convbuf, sizeof convbuf,
-				    "REG_0x%x", target);
+
+		if (errcode&REG_ITOA) {
+			if (r->code != 0)
+				(void) strlcpy(convbuf, r->name, sizeof(convbuf));
+			else
+				snprintf(convbuf, sizeof(convbuf), "REG_0x%x", target);
 			s = convbuf;
 		} else
 			s = r->explain;
@@ -194,21 +160,17 @@
 
 	len = strlen(s) + 1;
 	if (errbuf_size > 0)
-		(void)strlcpy(errbuf, s, errbuf_size);
+		(void) strlcpy(errbuf, s, errbuf_size);
 
 	return(len);
 }
 
 /*
- * regatoi - internal routine to implement REG_ATOI
- * static const char *regatoi(const regex_t *preg, char *localbuf,
- * size_t buflen);
+ - regatoi - internal routine to implement REG_ATOI
+ == static char *regatoi(const regex_t *preg, char *localbuf);
  */
 static const char *
-regatoi(
-    const regex_t *preg,
-    char *localbuf,
-    size_t buflen)
+regatoi(const regex_t *preg, char *localbuf, size_t buflen)
 {
 	const struct rerr *r;
 
@@ -218,6 +180,6 @@
 	if (r->code == 0)
 		return "0";
 
-	(void)snprintf(localbuf, buflen, "%d", r->code);
+	snprintf(localbuf, buflen, "%d", r->code);
 	return localbuf;
 }

diff --git a/libc/upstream-netbsd/lib/libc/regex/regex2.h b/libc/upstream-netbsd/lib/libc/regex/regex2.h
index 7c877ee..fbfff0d 100644
--- a/libc/upstream-netbsd/lib/libc/regex/regex2.h
+++ b/libc/upstream-netbsd/lib/libc/regex/regex2.h

@@ -1,6 +1,9 @@
-/*	$NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $	*/
+/*	$NetBSD: regex2.h,v 1.15 2021/02/24 18:13:21 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,43 +35,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
- */
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
+ * $FreeBSD: head/lib/libc/regex/regex2.h 368359 2020-12-05 03:18:48Z kevans $
  */
 
 /*
@@ -109,68 +76,100 @@
  * In state representations, an operator's bit is on to signify a state
  * immediately *preceding* "execution" of that operator.
  */
-typedef u_int32_t sop;	/* strip operator */
-typedef size_t sopno;
-#define	OPRMASK	((u_int32_t)0xf8000000UL)
-#define	OPDMASK	((u_int32_t)0x07ffffffUL)
-#define	OPSHIFT	((unsigned)27)
+typedef uint32_t sop;	/* strip operator */
+typedef uint32_t sopno;
+#define	OPRMASK	0xf8000000U
+#define	OPDMASK	0x07ffffffU
+#define	OPSHIFT	(27U)
 #define	OP(n)	((n)&OPRMASK)
-#define	OPND(n)	((int)((n)&OPDMASK))
+#define	OPND(n)	((n)&OPDMASK)
 #define	SOP(op, opnd)	((op)|(opnd))
-
-#define OPC(n)	(((u_int32_t)(n))<<OPSHIFT)
-/* operators		   meaning	operand			*/
-/*					(back, fwd are offsets)	*/
-#define	OEND	OPC(1)	/* endmarker	-			*/
-#define	OCHAR	OPC(2)	/* character	unsigned char		*/
-#define	OBOL	OPC(3)	/* left anchor	-			*/
-#define	OEOL	OPC(4)	/* right anchor	-			*/
-#define	OANY	OPC(5)	/* .		-			*/
-#define	OANYOF	OPC(6)	/* [...]	set number		*/
-#define	OBACK_	OPC(7)	/* begin \d	paren number		*/
-#define	O_BACK	OPC(8)	/* end \d	paren number		*/
-#define	OPLUS_	OPC(9)	/* + prefix	fwd to suffix		*/
-#define	O_PLUS	OPC(10)	/* + suffix	back to prefix		*/
-#define	OQUEST_	OPC(11)	/* ? prefix	fwd to suffix		*/
-#define	O_QUEST	OPC(12)	/* ? suffix	back to prefix		*/
-#define	OLPAREN	OPC(13)	/* (		fwd to )		*/
-#define	ORPAREN	OPC(14)	/* )		back to (		*/
-#define	OCH_	OPC(15)	/* begin choice	fwd to OOR2		*/
-#define	OOR1	OPC(16)	/* | pt. 1	back to OOR1 or OCH_	*/
-#define	OOR2	OPC(17)	/* | pt. 2	fwd to OOR2 or O_CH	*/
-#define	O_CH	OPC(18)	/* end choice	back to OOR1		*/
-#define	OBOW	OPC(19)	/* begin word	-			*/
-#define	OEOW	OPC(20)	/* end word	-			*/
+/* operators			   meaning	operand			*/
+/*						(back, fwd are offsets)	*/
+#define	OEND	(1U<<OPSHIFT)	/* endmarker	-			*/
+#define	OCHAR	(2U<<OPSHIFT)	/* character	wide character		*/
+#define	OBOL	(3U<<OPSHIFT)	/* left anchor	-			*/
+#define	OEOL	(4U<<OPSHIFT)	/* right anchor	-			*/
+#define	OANY	(5U<<OPSHIFT)	/* .		-			*/
+#define	OANYOF	(6U<<OPSHIFT)	/* [...]	set number		*/
+#define	OBACK_	(7U<<OPSHIFT)	/* begin \d	paren number		*/
+#define	O_BACK	(8U<<OPSHIFT)	/* end \d	paren number		*/
+#define	OPLUS_	(9U<<OPSHIFT)	/* + prefix	fwd to suffix		*/
+#define	O_PLUS	(10U<<OPSHIFT)	/* + suffix	back to prefix		*/
+#define	OQUEST_	(11U<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
+#define	O_QUEST	(12U<<OPSHIFT)	/* ? suffix	back to prefix		*/
+#define	OLPAREN	(13U<<OPSHIFT)	/* (		fwd to )		*/
+#define	ORPAREN	(14U<<OPSHIFT)	/* )		back to (		*/
+#define	OCH_	(15U<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
+#define	OOR1	(16U<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
+#define	OOR2	(17U<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
+#define	O_CH	(18U<<OPSHIFT)	/* end choice	back to OOR1		*/
+#define	OBOW	(19U<<OPSHIFT)	/* begin word	-			*/
+#define	OEOW	(20U<<OPSHIFT)	/* end word	-			*/
+#define	OBOS	(21U<<OPSHIFT)	/* begin subj.  -			*/
+#define	OEOS	(22U<<OPSHIFT)	/* end subj.	-			*/
+#define	OWBND	(23U<<OPSHIFT)	/* word bound	-			*/
+#define	ONWBND	(24U<<OPSHIFT)	/* not bound	-			*/
 
 /*
- * Structure for [] character-set representation.  Character sets are
- * done as bit vectors, grouped 8 to a byte vector for compactness.
- * The individual set therefore has both a pointer to the byte vector
- * and a mask to pick out the relevant bit of each byte.  A hash code
- * simplifies testing whether two sets could be identical.
- *
- * This will get trickier for multicharacter collating elements.  As
- * preliminary hooks for dealing with such things, we also carry along
- * a string of multi-character elements, and decide the size of the
- * vectors at run time.
+ * Structures for [] character-set representation.
  */
 typedef struct {
-	uch *ptr;		/* -> uch [csetsize] */
-	uch mask;		/* bit within array */
-	uch hash;		/* hash code */
-	size_t smultis;
-	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
+	wint_t		min;
+	wint_t		max;
+} crange;
+typedef struct {
+	unsigned char	bmp[NC_MAX / 8];
+	wctype_t	*types;
+	unsigned int	ntypes;
+	wint_t		*wides;
+	unsigned int	nwides;
+	crange		*ranges;
+	unsigned int	nranges;
+	int		invert;
+	int		icase;
 } cset;
-/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
-#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
-#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
-#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
-#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* regcomp() internal fns */
-#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
-#define	MCin(p, cs, cp)	mcin(p, cs, cp)
 
-/* stuff for character categories */
-typedef unsigned char cat_t;
+static int
+CHIN1(cset *cs, wint_t ch)
+{
+	unsigned int i;
+
+	assert(ch >= 0);
+	if (ch < NC)
+		return (((cs->bmp[(unsigned)ch >> 3] & (1 << (ch & 7))) != 0) ^
+		    cs->invert);
+	for (i = 0; i < cs->nwides; i++) {
+		if (cs->icase) {
+			if (ch == towlower(cs->wides[i]) ||
+			    ch == towupper(cs->wides[i]))
+				return (!cs->invert);
+		} else if (ch == cs->wides[i])
+			return (!cs->invert);
+	}
+	for (i = 0; i < cs->nranges; i++)
+		if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
+			return (!cs->invert);
+	for (i = 0; i < cs->ntypes; i++)
+		if (iswctype(ch, cs->types[i]))
+			return (!cs->invert);
+	return (cs->invert);
+}
+
+static __inline int
+CHIN(cset *cs, wint_t ch)
+{
+
+	assert(ch >= 0);
+	if (ch < NC)
+		return (((cs->bmp[(unsigned)ch >> 3] & (1 << (ch & 7))) != 0) ^
+		    cs->invert);
+	else if (cs->icase)
+		return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
+		    CHIN1(cs, towupper(ch)));
+	else
+		return (CHIN1(cs, ch));
+}
 
 /*
  * main compiled-expression structure
@@ -179,10 +178,8 @@
 	int magic;
 #		define	MAGIC2	((('R'^0200)<<8)|'E')
 	sop *strip;		/* malloced area for strip */
-	size_t csetsize;	/* number of bits in a cset vector */
 	size_t ncsets;		/* number of csets in use */
 	cset *sets;		/* -> cset [ncsets] */
-	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
 	int cflags;		/* copy of regcomp() cflags argument */
 	sopno nstates;		/* = number of sops */
 	sopno firststate;	/* the initial OEND (normally 0) */
@@ -193,17 +190,17 @@
 #		define	BAD	04	/* something wrong */
 	size_t nbol;		/* number of ^ used */
 	size_t neol;		/* number of $ used */
-	size_t ncategories;	/* how many character categories */
-	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
 	char *must;		/* match must contain this string */
+	int moffset;		/* latest point at which must may be located */
+	size_t *charjump;	/* Boyer-Moore char jump table */
+	size_t *matchjump;	/* Boyer-Moore match jump table */
 	size_t mlen;		/* length of must */
 	size_t nsub;		/* copy of re_nsub */
 	int backrefs;		/* does it use back references? */
 	sopno nplus;		/* how deep does it nest +s? */
-	/* catspace must be last */
-	cat_t catspace[1];	/* actually [NC] */
 };
 
 /* misc utilities */
-#define	OUT	(CHAR_MAX+1)	/* a non-character value */
-#define	ISWORD(c)	(isalnum((unsigned char)c) || (c) == '_')
+#define	OUT	(CHAR_MIN - 1)	/* a non-character value */
+#define	IGN	(CHAR_MIN - 2)
+#define ISWORD(c)       (iswalnum((uch)(c)) || (c) == '_')

diff --git a/libc/upstream-netbsd/lib/libc/regex/regexec.c b/libc/upstream-netbsd/lib/libc/regex/regexec.c
index f16e0b6..213a90b 100644
--- a/libc/upstream-netbsd/lib/libc/regex/regexec.c
+++ b/libc/upstream-netbsd/lib/libc/regex/regexec.c

@@ -1,6 +1,9 @@
-/*	$NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $	*/
+/*	$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -34,91 +37,96 @@
  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
- */
+#if HAVE_NBTOOL_CONFIG_H
+#include "nbtool_config.h"
+#endif
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
-#else
-__RCSID("$NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $");
+__FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $");
 #endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $");
 
 /*
  * the outer shell of regexec()
  *
- * This file includes engine.c *twice*, after muchos fiddling with the
+ * This file includes engine.c three times, after muchos fiddling with the
  * macros that code uses.  This lets the same code operate on two different
- * representations for state sets.
+ * representations for state sets and characters.
  */
-#include "namespace.h"
-#include <sys/types.h>
 
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
+#ifndef LIBHACK
+#include "namespace.h"
+#endif
+#include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
+#include <ctype.h>
 #include <regex.h>
 
-#ifdef __weak_alias
+#if defined(__weak_alias) && !defined(LIBHACK)
 __weak_alias(regexec,_regexec)
 #endif
 
 #include "utils.h"
 #include "regex2.h"
 
+static __inline size_t
+xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
+{
+#ifdef NLS
+	size_t nr;
+	wchar_t wc;
+
+	nr = mbrtowc(&wc, s, n, mbs);
+	if (wi != NULL)
+		*wi = wc;
+	if (nr == 0)
+		return (1);
+	else if (nr == (size_t)-1 || nr == (size_t)-2) {
+		memset(mbs, 0, sizeof(*mbs));
+		if (wi != NULL)
+			*wi = dummy;
+		return (1);
+	} else
+                return (nr);
+#else
+	if (wi)
+		*wi = *s;
+	return 1;
+#endif
+}
+
+static __inline size_t
+xmbrtowc_dummy(wint_t *wi,
+		const char *s,
+		size_t n __unused,
+		mbstate_t *mbs __unused,
+		wint_t dummy __unused)
+{
+
+	if (wi != NULL)
+		*wi = (unsigned char)*s;
+	return (1);
+}
+
 /* macros for manipulating states, small version */
-#define	states	unsigned long
-#define	states1	unsigned long	/* for later use in regexec() decision */
+#define	states	long
+#define	states1	states		/* for later use in regexec() decision */
 #define	CLEAR(v)	((v) = 0)
 #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
 #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
 #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
 #define	ASSIGN(d, s)	((d) = (s))
 #define	EQ(a, b)	((a) == (b))
-#define	STATEVARS	int dummy	/* dummy version */
+#define	STATEVARS	long dummy	/* dummy version */
 #define	STATESETUP(m, n)	/* nothing */
 #define	STATETEARDOWN(m)	/* nothing */
 #define	SETUP(v)	((v) = 0)
-#define	onestate	unsigned long
+#define	onestate	long
 #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
 #define	INC(o)	((o) <<= 1)
 #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
@@ -127,6 +135,9 @@
 #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
 #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
 #define	ISSETBACK(v, n)	(((v) & ((unsigned long)here >> (n))) != 0)
+/* no multibyte support */
+#define	XMBRTOWC	xmbrtowc_dummy
+#define	ZAPSTATE(mbs)	((void)(mbs))
 /* function names */
 #define SNAMES			/* engine.c looks after details */
 
@@ -152,26 +163,25 @@
 #undef	BACK
 #undef	ISSETBACK
 #undef	SNAMES
+#undef	XMBRTOWC
+#undef	ZAPSTATE
 
 /* macros for manipulating states, large version */
 #define	states	char *
-#define	CLEAR(v)	memset(v, 0, (size_t)m->g->nstates)
+#define	CLEAR(v)	memset(v, 0, m->g->nstates)
 #define	SET0(v, n)	((v)[n] = 0)
 #define	SET1(v, n)	((v)[n] = 1)
 #define	ISSET(v, n)	((v)[n])
-#define	ASSIGN(d, s)	memcpy(d, s, (size_t)m->g->nstates)
-#define	EQ(a, b)	(memcmp(a, b, (size_t)m->g->nstates) == 0)
-#define	STATEVARS	int vn; char *space
-#define	STATESETUP(m, nv) \
-    if (((m)->space = malloc((size_t)((nv)*(m)->g->nstates))) == NULL) \
-	return(REG_ESPACE); \
-    else \
-	(m)->vn = 0
-
-#define	STATETEARDOWN(m)	{ free((m)->space); m->space = NULL; }
-#define	SETUP(v)	((v) = &m->space[(size_t)(m->vn++ * m->g->nstates)])
-#define	onestate	int
-#define	INIT(o, n)	((o) = (int)(n))
+#define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
+#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
+#define	STATEVARS	long vn; char *space
+#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
+				if ((m)->space == NULL) return(REG_ESPACE); \
+				(m)->vn = 0; }
+#define	STATETEARDOWN(m)	{ free((m)->space); }
+#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
+#define	onestate	long
+#define	INIT(o, n)	((o) = (n))
 #define	INC(o)	((o)++)
 #define	ISSTATEIN(v, o)	((v)[o])
 /* some abbreviations; note that some of these know variable names! */
@@ -179,11 +189,24 @@
 #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
 #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
 #define	ISSETBACK(v, n)	((v)[here - (n)])
+/* no multibyte support */
+#define	XMBRTOWC	xmbrtowc_dummy
+#define	ZAPSTATE(mbs)	((void)(mbs))
 /* function names */
 #define	LNAMES			/* flag */
 
 #include "engine.c"
 
+/* multibyte character & large states version */
+#undef	LNAMES
+#undef	XMBRTOWC
+#undef	ZAPSTATE
+#define	XMBRTOWC	xmbrtowc
+#define	ZAPSTATE(mbs)	memset((mbs), 0, sizeof(*(mbs)))
+#define	MNAMES
+
+#include "engine.c"
+
 /*
  - regexec - interface for matching
  = extern int regexec(const regex_t *, const char *, size_t, \
@@ -200,21 +223,18 @@
  * have been prototyped.
  */
 int				/* 0 success, REG_NOMATCH failure */
-regexec(
-    const regex_t *preg,
-    const char *string,
-    size_t nmatch,
-    regmatch_t pmatch[],
-    int eflags)
+regexec(const regex_t * __restrict preg,
+	const char * __restrict string,
+	size_t nmatch,
+	regmatch_t pmatch[__restrict],
+	int eflags)
 {
 	struct re_guts *g = preg->re_g;
-	char *s;
 #ifdef REDEBUG
 #	define	GOODFLAGS(f)	(f)
 #else
 #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
 #endif
-
 	_DIAGASSERT(preg != NULL);
 	_DIAGASSERT(string != NULL);
 
@@ -225,10 +245,10 @@
 		return(REG_BADPAT);
 	eflags = GOODFLAGS(eflags);
 
-	s = __UNCONST(string);
-
-	if (g->nstates <= (sopno)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
-		return(smatcher(g, s, nmatch, pmatch, eflags));
+	if (MB_CUR_MAX > 1)
+		return(mmatcher(g, string, nmatch, pmatch, eflags));
+	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
+		return(smatcher(g, string, nmatch, pmatch, eflags));
 	else
-		return(lmatcher(g, s, nmatch, pmatch, eflags));
+		return(lmatcher(g, string, nmatch, pmatch, eflags));
 }

diff --git a/libc/upstream-netbsd/lib/libc/regex/regfree.c b/libc/upstream-netbsd/lib/libc/regex/regfree.c
index ce011ea..7e388b1 100644
--- a/libc/upstream-netbsd/lib/libc/regex/regfree.c
+++ b/libc/upstream-netbsd/lib/libc/regex/regfree.c

@@ -1,6 +1,9 @@
-/*	$NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $	*/
+/*	$NetBSD: regfree.c,v 1.19 2021/02/26 19:24:47 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -34,58 +37,22 @@
  *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
- */
+#if HAVE_NBTOOL_CONFIG_H
+#include "nbtool_config.h"
+#endif
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)regfree.c	8.3 (Berkeley) 3/20/94";
-#else
-__RCSID("$NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $");
+__FBSDID("$FreeBSD: head/lib/libc/regex/regfree.c 326025 2017-11-20 19:49:47Z pfg $");
 #endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: regfree.c,v 1.19 2021/02/26 19:24:47 christos Exp $");
 
 #include "namespace.h"
 #include <sys/types.h>
-
-#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <limits.h>
 #include <regex.h>
 
 #ifdef __weak_alias
@@ -100,10 +67,10 @@
  = extern void regfree(regex_t *);
  */
 void
-regfree(
-    regex_t *preg)
+regfree(regex_t *preg)
 {
 	struct re_guts *g;
+	unsigned int i;
 
 	_DIAGASSERT(preg != NULL);
 
@@ -119,11 +86,19 @@
 
 	if (g->strip != NULL)
 		free(g->strip);
-	if (g->sets != NULL)
+	if (g->sets != NULL) {
+		for (i = 0; i < g->ncsets; i++) {
+			free(g->sets[i].ranges);
+			free(g->sets[i].wides);
+			free(g->sets[i].types);
+		}
 		free(g->sets);
-	if (g->setbits != NULL)
-		free(g->setbits);
+	}
 	if (g->must != NULL)
 		free(g->must);
+	if (g->charjump != NULL)
+		free(&g->charjump[CHAR_MIN]);
+	if (g->matchjump != NULL)
+		free(g->matchjump);
 	free(g);
 }

diff --git a/libc/upstream-netbsd/lib/libc/regex/utils.h b/libc/upstream-netbsd/lib/libc/regex/utils.h
index 762caee..972f555 100644
--- a/libc/upstream-netbsd/lib/libc/regex/utils.h
+++ b/libc/upstream-netbsd/lib/libc/regex/utils.h

@@ -1,6 +1,9 @@
-/*	$NetBSD: utils.h,v 1.6 2003/08/07 16:43:21 agc Exp $	*/
+/*	$NetBSD: utils.h,v 1.9 2021/04/22 19:20:24 christos Exp $	*/
 
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,49 +35,38 @@
  * SUCH DAMAGE.
  *
  *	@(#)utils.h	8.3 (Berkeley) 3/20/94
+ * $FreeBSD: head/lib/libc/regex/utils.h 341838 2018-12-12 04:23:00Z yuripv $
  */
 
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)utils.h	8.3 (Berkeley) 3/20/94
- */
+#ifdef NLS
+#include <wchar.h>
+#include <wctype.h>
+#else
+#include <ctype.h>
+#define wint_t regex_wint_t
+#define mbstate_t regex_mbstate_t
+#define wctype_t regex_wctype_t
+typedef short wint_t;
+typedef char mbstate_t;
+typedef short wctype_t;
+#define iswupper(a) isupper(a)
+#define iswlower(a) islower(a)
+#define iswalpha(a) isalpha(a)
+#define iswalnum(a) isalnum(a)
+#define towupper(a) toupper(a)
+#define towlower(a) tolower(a)
+extern wctype_t __regex_wctype(const char *);
+extern int __regex_iswctype(wint_t, wctype_t);
+#define wctype(s) __regex_wctype(s)
+#define iswctype(c, t) __regex_iswctype((c), (t))
+#endif
 
 /* utility definitions */
 #define	DUPMAX		_POSIX2_RE_DUP_MAX	/* xxx is this right? */
 #define	INFINITY	(DUPMAX + 1)
-#define	NC		(CHAR_MAX - CHAR_MIN + 1)
+
+#define	NC_MAX		(CHAR_MAX - CHAR_MIN + 1)
+#define	NC		((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
 typedef unsigned char uch;
 
 /* switch off assertions (if not already off) if no REDEBUG */

diff --git a/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c b/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c
index 2b0e0d8..e48fe85 100644
--- a/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c
+++ b/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c

@@ -1,4 +1,4 @@
-/*	$NetBSD: bsearch.c,v 1.15 2012/03/04 20:01:45 christos Exp $	*/
+/*	$NetBSD: bsearch.c,v 1.16 2022/05/31 08:43:14 andvar Exp $	*/
 
 /*
  * Copyright (c) 1990, 1993
@@ -34,7 +34,7 @@
 #if 0
 static char sccsid[] = "@(#)bsearch.c	8.1 (Berkeley) 6/4/93";
 #else
-__RCSID("$NetBSD: bsearch.c,v 1.15 2012/03/04 20:01:45 christos Exp $");
+__RCSID("$NetBSD: bsearch.c,v 1.16 2022/05/31 08:43:14 andvar Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 
@@ -50,7 +50,7 @@
  * is odd, moving left simply involves halving lim: e.g., when lim
  * is 5 we look at item 2, so we change lim to 2 so that we will
  * look at items 0 & 1.  If lim is even, the same applies.  If lim
- * is odd, moving right again involes halving lim, this time moving
+ * is odd, moving right again involves halving lim, this time moving
  * the base up one item past p: e.g., when lim is 5 we change base
  * to item 3 and make lim 2 so that we will look at items 3 and 4.
  * If lim is even, however, we have to shrink it by one before
commit	88e4e60a75841db476b765742ea61fc0403abbed	[log] [tgz]
author	Elliott Hughes <enh@google.com>	Tue Nov 08 02:51:00 2022 +0000
committer	Elliott Hughes <enh@google.com>	Tue Nov 08 02:51:48 2022 +0000
tree	2c9b6b03fc32cd32ee9f0d164c7e2124d33159ef
parent	09db6416bc061baf686fba2f50de7e96cb262785 [diff]