regcomp.c@ 3550

Last change on this file since 3550 was 3550, checked in by bird, 3 years ago
sed: Use get_crt_codepage() instead of fake nl_langinfo.
File size: 107.7 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <[email protected]>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	static reg_errcode_t re_compile_internal (regex_t preg, const char pattern,
22	size_t length, reg_syntax_t syntax);
23	static void re_compile_fastmap_iter (regex_t *bufp,
24	const re_dfastate_t *init_state,
25	char *fastmap);
26	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
27	#ifdef RE_ENABLE_I18N
28	static void free_charset (re_charset_t *cset);
29	#endif /* RE_ENABLE_I18N */
30	static void free_workarea_compile (regex_t *preg);
31	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
32	#ifdef RE_ENABLE_I18N
33	static void optimize_utf8 (re_dfa_t *dfa);
34	#endif
35	static reg_errcode_t analyze (regex_t *preg);
36	static reg_errcode_t preorder (bin_tree_t *root,
37	reg_errcode_t (fn (void , bin_tree_t )),
38	void *extra);
39	static reg_errcode_t postorder (bin_tree_t *root,
40	reg_errcode_t (fn (void , bin_tree_t )),
41	void *extra);
42	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
43	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
44	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
45	bin_tree_t *node);
46	static reg_errcode_t calc_first (void extra, bin_tree_t node);
47	static reg_errcode_t calc_next (void extra, bin_tree_t node);
48	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
49	static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
50	static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
51	unsigned int constraint);
52	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
53	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
54	int node, int root);
55	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
56	static int fetch_number (re_string_t input, re_token_t token,
57	reg_syntax_t syntax);
58	static int peek_token (re_token_t token, re_string_t input,
59	reg_syntax_t syntax) internal_function;
60	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
61	reg_syntax_t syntax, reg_errcode_t *err);
62	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
63	re_token_t *token, reg_syntax_t syntax,
64	int nest, reg_errcode_t *err);
65	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
66	re_token_t *token, reg_syntax_t syntax,
67	int nest, reg_errcode_t *err);
68	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
69	re_token_t *token, reg_syntax_t syntax,
70	int nest, reg_errcode_t *err);
71	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
72	re_token_t *token, reg_syntax_t syntax,
73	int nest, reg_errcode_t *err);
74	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
75	re_dfa_t dfa, re_token_t token,
76	reg_syntax_t syntax, reg_errcode_t *err);
77	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
78	re_token_t *token, reg_syntax_t syntax,
79	reg_errcode_t *err);
80	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
81	re_string_t *regexp,
82	re_token_t *token, int token_len,
83	re_dfa_t *dfa,
84	reg_syntax_t syntax,
85	int accept_hyphen);
86	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
87	re_string_t *regexp,
88	re_token_t *token);
89	#ifdef RE_ENABLE_I18N
90	static reg_errcode_t build_equiv_class (bitset_t sbcset,
91	re_charset_t *mbcset,
92	int *equiv_class_alloc,
93	const unsigned char *name);
94	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
95	bitset_t sbcset,
96	re_charset_t *mbcset,
97	int *char_class_alloc,
98	const unsigned char *class_name,
99	reg_syntax_t syntax);
100	#else /* not RE_ENABLE_I18N */
101	static reg_errcode_t build_equiv_class (bitset_t sbcset,
102	const unsigned char *name);
103	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
104	bitset_t sbcset,
105	const unsigned char *class_name,
106	reg_syntax_t syntax);
107	#endif /* not RE_ENABLE_I18N */
108	static bin_tree_t build_charclass_op (re_dfa_t dfa,
109	RE_TRANSLATE_TYPE trans,
110	const unsigned char *class_name,
111	const unsigned char *extra,
112	int non_match, reg_errcode_t *err);
113	static bin_tree_t create_tree (re_dfa_t dfa,
114	bin_tree_t left, bin_tree_t right,
115	re_token_type_t type);
116	static bin_tree_t create_token_tree (re_dfa_t dfa,
117	bin_tree_t left, bin_tree_t right,
118	const re_token_t *token);
119	static bin_tree_t duplicate_tree (const bin_tree_t src, re_dfa_t *dfa);
120	static void free_token (re_token_t *node);
121	static reg_errcode_t free_tree (void extra, bin_tree_t node);
122	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
123
124
125	/* This table gives an error message for each of the error codes listed
126	in regex.h. Obviously the order here has to be same as there.
127	POSIX doesn't require that we do anything for REG_NOERROR,
128	but why not be nice? */
129
130	const char __re_error_msgid[] attribute_hidden =
131	{
132	#define REG_NOERROR_IDX 0
133	gettext_noop ("Success") /* REG_NOERROR */
134	"\0"
135	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
136	gettext_noop ("No match") /* REG_NOMATCH */
137	"\0"
138	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
139	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
140	"\0"
141	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
142	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
143	"\0"
144	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
145	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
146	"\0"
147	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
148	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
149	"\0"
150	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
151	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
152	"\0"
153	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
154	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
155	"\0"
156	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
157	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
158	"\0"
159	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
160	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
161	"\0"
162	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
163	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
164	"\0"
165	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
166	gettext_noop ("Invalid range end") /* REG_ERANGE */
167	"\0"
168	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
169	gettext_noop ("Memory exhausted") /* REG_ESPACE */
170	"\0"
171	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
172	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
173	"\0"
174	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
175	gettext_noop ("Premature end of regular expression") /* REG_EEND */
176	"\0"
177	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
178	gettext_noop ("Regular expression too big") /* REG_ESIZE */
179	"\0"
180	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
181	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
182	};
183
184	const size_t __re_error_msgid_idx[] attribute_hidden =
185	{
186	REG_NOERROR_IDX,
187	REG_NOMATCH_IDX,
188	REG_BADPAT_IDX,
189	REG_ECOLLATE_IDX,
190	REG_ECTYPE_IDX,
191	REG_EESCAPE_IDX,
192	REG_ESUBREG_IDX,
193	REG_EBRACK_IDX,
194	REG_EPAREN_IDX,
195	REG_EBRACE_IDX,
196	REG_BADBR_IDX,
197	REG_ERANGE_IDX,
198	REG_ESPACE_IDX,
199	REG_BADRPT_IDX,
200	REG_EEND_IDX,
201	REG_ESIZE_IDX,
202	REG_ERPAREN_IDX
203	};
204
205
206	/* Entry points for GNU code. */
207
208	/* re_compile_pattern is the GNU regular expression compiler: it
209	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
210	Returns 0 if the pattern was valid, otherwise an error string.
211
212	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
213	are set in BUFP on entry. */
214
215	const char *
216	re_compile_pattern (pattern, length, bufp)
217	const char *pattern;
218	size_t length;
219	struct re_pattern_buffer *bufp;
220	{
221	reg_errcode_t ret;
222
223	/* And GNU code determines whether or not to get register information
224	by passing null for the REGS argument to re_match, etc., not by
225	setting no_sub, unless RE_NO_SUB is set. */
226	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
227
228	/* Match anchors at newline. */
229	bufp->newline_anchor = 1;
230
231	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
232
233	if (!ret)
234	return NULL;
235	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
236	}
237	#ifdef _LIBC
238	weak_alias (__re_compile_pattern, re_compile_pattern)
239	#endif
240
241	/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
242	also be assigned to arbitrarily: each pattern buffer stores its own
243	syntax, so it can be changed between regex compilations. */
244	/* This has no initializer because initialized variables in Emacs
245	become read-only after dumping. */
246	reg_syntax_t re_syntax_options;
247
248
249	/* Specify the precise syntax of regexps for compilation. This provides
250	for compatibility for various utilities which historically have
251	different, incompatible syntaxes.
252
253	The argument SYNTAX is a bit mask comprised of the various bits
254	defined in regex.h. We return the old syntax. */
255
256	reg_syntax_t
257	re_set_syntax (syntax)
258	reg_syntax_t syntax;
259	{
260	reg_syntax_t ret = re_syntax_options;
261
262	re_syntax_options = syntax;
263	return ret;
264	}
265	#ifdef _LIBC
266	weak_alias (__re_set_syntax, re_set_syntax)
267	#endif
268
269	int
270	re_compile_fastmap (bufp)
271	struct re_pattern_buffer *bufp;
272	{
273	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
274	char *fastmap = bufp->fastmap;
275
276	memset (fastmap, '\0', sizeof (char) * SBC_MAX);
277	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
278	if (dfa->init_state != dfa->init_state_word)
279	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
280	if (dfa->init_state != dfa->init_state_nl)
281	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
282	if (dfa->init_state != dfa->init_state_begbuf)
283	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
284	bufp->fastmap_accurate = 1;
285	return 0;
286	}
287	#ifdef _LIBC
288	weak_alias (__re_compile_fastmap, re_compile_fastmap)
289	#endif
290
291	static inline void
292	__attribute ((always_inline))
293	re_set_fastmap (char *fastmap, int icase, int ch)
294	{
295	fastmap[ch] = 1;
296	if (icase)
297	fastmap[tolower (ch)] = 1;
298	}
299
300	/* Helper function for re_compile_fastmap.
301	Compile fastmap for the initial_state INIT_STATE. */
302
303	static void
304	re_compile_fastmap_iter (regex_t bufp, const re_dfastate_t init_state,
305	char *fastmap)
306	{
307	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
308	int node_cnt;
309	int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
310	for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
311	{
312	int node = init_state->nodes.elems[node_cnt];
313	re_token_type_t type = dfa->nodes[node].type;
314
315	if (type == CHARACTER)
316	{
317	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
318	#ifdef RE_ENABLE_I18N
319	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
320	{
321	unsigned char buf = alloca (dfa->mb_cur_max), p;
322	wchar_t wc;
323	mbstate_t state;
324
325	p = buf;
326	*p++ = dfa->nodes[node].opr.c;
327	while (++node < dfa->nodes_len
328	&& dfa->nodes[node].type == CHARACTER
329	&& dfa->nodes[node].mb_partial)
330	*p++ = dfa->nodes[node].opr.c;
331	memset (&state, '\0', sizeof (state));
332	if (mbrtowc (&wc, (const char *) buf, p - buf,
333	&state) == p - buf
334	&& (__wcrtomb ((char *) buf, towlower (wc), &state)
335	!= (size_t) -1))
336	re_set_fastmap (fastmap, 0, buf[0]);
337	}
338	#endif
339	}
340	else if (type == SIMPLE_BRACKET)
341	{
342	int i, ch;
343	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
344	{
345	int j;
346	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
347	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
348	if (w & ((bitset_word_t) 1 << j))
349	re_set_fastmap (fastmap, icase, ch);
350	}
351	}
352	#ifdef RE_ENABLE_I18N
353	else if (type == COMPLEX_BRACKET)
354	{
355	int i;
356	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
357	if (cset->non_match \|\| cset->ncoll_syms \|\| cset->nequiv_classes
358	\|\| cset->nranges \|\| cset->nchar_classes)
359	{
360	# ifdef _LIBC
361	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
362	{
363	/* In this case we want to catch the bytes which are
364	the first byte of any collation elements.
365	e.g. In da_DK, we want to catch 'a' since "aa"
366	is a valid collation element, and don't catch
367	'b' since 'b' is the only collation element
368	which starts from 'b'. */
369	const int32_t table = (const int32_t )
370	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371	for (i = 0; i < SBC_MAX; ++i)
372	if (table[i] < 0)
373	re_set_fastmap (fastmap, icase, i);
374	}
375	# else
376	if (dfa->mb_cur_max > 1)
377	for (i = 0; i < SBC_MAX; ++i)
378	if (__btowc (i) == WEOF)
379	re_set_fastmap (fastmap, icase, i);
380	# endif /* not _LIBC */
381	}
382	for (i = 0; i < cset->nmbchars; ++i)
383	{
384	char buf[256];
385	mbstate_t state;
386	memset (&state, '\0', sizeof (state));
387	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
388	re_set_fastmap (fastmap, icase, (unsigned char ) buf);
389	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
390	{
391	if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
392	!= (size_t) -1)
393	re_set_fastmap (fastmap, 0, (unsigned char ) buf);
394	}
395	}
396	}
397	#endif /* RE_ENABLE_I18N */
398	else if (type == OP_PERIOD
399	#ifdef RE_ENABLE_I18N
400	\|\| type == OP_UTF8_PERIOD
401	#endif /* RE_ENABLE_I18N */
402	\|\| type == END_OF_RE)
403	{
404	memset (fastmap, '\1', sizeof (char) * SBC_MAX);
405	if (type == END_OF_RE)
406	bufp->can_be_null = 1;
407	return;
408	}
409	}
410	}
411
412
413	/* Entry point for POSIX code. */
414	/* regcomp takes a regular expression as a string and compiles it.
415
416	PREG is a regex_t *. We do not expect any fields to be initialized,
417	since POSIX says we shouldn't. Thus, we set
418
419	`buffer' to the compiled pattern;
420	`used' to the length of the compiled pattern;
421	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
422	REG_EXTENDED bit in CFLAGS is set; otherwise, to
423	RE_SYNTAX_POSIX_BASIC;
424	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
425	`fastmap' to an allocated space for the fastmap;
426	`fastmap_accurate' to zero;
427	`re_nsub' to the number of subexpressions in PATTERN.
428
429	PATTERN is the address of the pattern string.
430
431	CFLAGS is a series of bits which affect compilation.
432
433	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
434	use POSIX basic syntax.
435
436	If REG_NEWLINE is set, then . and [^...] don't match newline.
437	Also, regexec will try a match beginning after every newline.
438
439	If REG_ICASE is set, then we considers upper- and lowercase
440	versions of letters to be equivalent when matching.
441
442	If REG_NOSUB is set, then when PREG is passed to regexec, that
443	routine will report only success or failure, and nothing about the
444	registers.
445
446	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
447	the return codes and their meanings.) */
448
449	int
450	regcomp (preg, pattern, cflags)
451	regex_t *__restrict preg;
452	const char *__restrict pattern;
453	int cflags;
454	{
455	reg_errcode_t ret;
456	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
457	: RE_SYNTAX_POSIX_BASIC);
458
459	preg->buffer = NULL;
460	preg->allocated = 0;
461	preg->used = 0;
462
463	/* Try to allocate space for the fastmap. */
464	preg->fastmap = re_malloc (char, SBC_MAX);
465	if (BE (preg->fastmap == NULL, 0))
466	return REG_ESPACE;
467
468	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : 0;
469
470	/* If REG_NEWLINE is set, newlines are treated differently. */
471	if (cflags & REG_NEWLINE)
472	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
473	syntax &= ~RE_DOT_NEWLINE;
474	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
475	/* It also changes the matching behavior. */
476	preg->newline_anchor = 1;
477	}
478	else
479	preg->newline_anchor = 0;
480	preg->no_sub = !!(cflags & REG_NOSUB);
481	preg->translate = NULL;
482
483	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
484
485	/* POSIX doesn't distinguish between an unmatched open-group and an
486	unmatched close-group: both are REG_EPAREN. */
487	if (ret == REG_ERPAREN)
488	ret = REG_EPAREN;
489
490	/* We have already checked preg->fastmap != NULL. */
491	if (BE (ret == REG_NOERROR, 1))
492	/* Compute the fastmap now, since regexec cannot modify the pattern
493	buffer. This function never fails in this implementation. */
494	(void) re_compile_fastmap (preg);
495	else
496	{
497	/* Some error occurred while compiling the expression. */
498	re_free (preg->fastmap);
499	preg->fastmap = NULL;
500	}
501
502	return (int) ret;
503	}
504	#ifdef _LIBC
505	weak_alias (__regcomp, regcomp)
506	#endif
507
508	/* Returns a message corresponding to an error code, ERRCODE, returned
509	from either regcomp or regexec. We don't use PREG here. */
510
511	size_t
512	regerror (errcode, preg, errbuf, errbuf_size)
513	int errcode;
514	const regex_t *__restrict preg;
515	char *__restrict errbuf;
516	size_t errbuf_size;
517	{
518	const char *msg;
519	size_t msg_size;
520
521	if (BE (errcode < 0
522	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
523	/ sizeof (__re_error_msgid_idx[0])), 0))
524	/* Only error codes returned by the rest of the code should be passed
525	to this routine. If we are given anything else, or if other regex
526	code generates an invalid error code, then the program has a bug.
527	Dump core so we can fix it. */
528	abort ();
529
530	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
531
532	msg_size = strlen (msg) + 1; /* Includes the null. */
533
534	if (BE (errbuf_size != 0, 1))
535	{
536	if (BE (msg_size > errbuf_size, 0))
537	{
538	#if defined HAVE_MEMPCPY \|\| defined _LIBC
539	((char ) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
540	#else
541	memcpy (errbuf, msg, errbuf_size - 1);
542	errbuf[errbuf_size - 1] = 0;
543	#endif
544	}
545	else
546	memcpy (errbuf, msg, msg_size);
547	}
548
549	return msg_size;
550	}
551	#ifdef _LIBC
552	weak_alias (__regerror, regerror)
553	#endif
554
555
556	#ifdef RE_ENABLE_I18N
557	/* This static array is used for the map to single-byte characters when
558	UTF-8 is used. Otherwise we would allocate memory just to initialize
559	it the same all the time. UTF-8 is the preferred encoding so this is
560	a worthwhile optimization. */
561	static const bitset_t utf8_sb_map =
562	{
563	/* Set the first 128 bits. */
564	#ifdef _MSC_VER
565	BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 0, 0, 0, 0
566	#else
567	[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
568	#endif
569	};
570	#endif
571
572
573	static void
574	free_dfa_content (re_dfa_t *dfa)
575	{
576	int i, j;
577
578	if (dfa->nodes)
579	for (i = 0; i < dfa->nodes_len; ++i)
580	free_token (dfa->nodes + i);
581	re_free (dfa->nexts);
582	for (i = 0; i < dfa->nodes_len; ++i)
583	{
584	if (dfa->eclosures != NULL)
585	re_node_set_free (dfa->eclosures + i);
586	if (dfa->inveclosures != NULL)
587	re_node_set_free (dfa->inveclosures + i);
588	if (dfa->edests != NULL)
589	re_node_set_free (dfa->edests + i);
590	}
591	re_free (dfa->edests);
592	re_free (dfa->eclosures);
593	re_free (dfa->inveclosures);
594	re_free (dfa->nodes);
595
596	if (dfa->state_table)
597	for (i = 0; i <= dfa->state_hash_mask; ++i)
598	{
599	struct re_state_table_entry *entry = dfa->state_table + i;
600	for (j = 0; j < entry->num; ++j)
601	{
602	re_dfastate_t *state = entry->array[j];
603	free_state (state);
604	}
605	re_free (entry->array);
606	}
607	re_free (dfa->state_table);
608	#ifdef RE_ENABLE_I18N
609	if (dfa->sb_char != utf8_sb_map)
610	re_free (dfa->sb_char);
611	#endif
612	re_free (dfa->subexp_map);
613	#ifdef DEBUG
614	re_free (dfa->re_str);
615	#endif
616
617	re_free (dfa);
618	}
619
620
621	/* Free dynamically allocated space used by PREG. */
622
623	void
624	regfree (preg)
625	regex_t *preg;
626	{
627	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
628	if (BE (dfa != NULL, 1))
629	free_dfa_content (dfa);
630	preg->buffer = NULL;
631	preg->allocated = 0;
632
633	re_free (preg->fastmap);
634	preg->fastmap = NULL;
635
636	re_free (preg->translate);
637	preg->translate = NULL;
638	}
639	#ifdef _LIBC
640	weak_alias (__regfree, regfree)
641	#endif
642
643
644	/* Entry points compatible with 4.2 BSD regex library. We don't define
645	them unless specifically requested. */
646
647	#if defined _REGEX_RE_COMP \|\| defined _LIBC
648
649	/* BSD has one and only one pattern buffer. */
650	static struct re_pattern_buffer re_comp_buf;
651
652	char *
653	# ifdef _LIBC
654	/* Make these definitions weak in libc, so POSIX programs can redefine
655	these names if they don't use our functions, and still use
656	regcomp/regexec above without link errors. */
657	weak_function
658	# endif
659	re_comp (s)
660	const char *s;
661	{
662	reg_errcode_t ret;
663	char *fastmap;
664
665	if (!s)
666	{
667	if (!re_comp_buf.buffer)
668	return gettext ("No previous regular expression");
669	return 0;
670	}
671
672	if (re_comp_buf.buffer)
673	{
674	fastmap = re_comp_buf.fastmap;
675	re_comp_buf.fastmap = NULL;
676	__regfree (&re_comp_buf);
677	memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
678	re_comp_buf.fastmap = fastmap;
679	}
680
681	if (re_comp_buf.fastmap == NULL)
682	{
683	re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
684	if (re_comp_buf.fastmap == NULL)
685	return (char *) gettext (__re_error_msgid
686	+ __re_error_msgid_idx[(int) REG_ESPACE]);
687	}
688
689	/* Since `re_exec' always passes NULL for the `regs' argument, we
690	don't need to initialize the pattern buffer fields which affect it. */
691
692	/* Match anchors at newlines. */
693	re_comp_buf.newline_anchor = 1;
694
695	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
696
697	if (!ret)
698	return NULL;
699
700	/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
701	return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
702	}
703
704	#ifdef _LIBC
705	libc_freeres_fn (free_mem)
706	{
707	__regfree (&re_comp_buf);
708	}
709	#endif
710
711	#endif /* _REGEX_RE_COMP */
712
713
714	/* Internal entry point.
715	Compile the regular expression PATTERN, whose length is LENGTH.
716	SYNTAX indicate regular expression's syntax. */
717
718	static reg_errcode_t
719	re_compile_internal (regex_t preg, const char pattern, size_t length,
720	reg_syntax_t syntax)
721	{
722	reg_errcode_t err = REG_NOERROR;
723	re_dfa_t *dfa;
724	re_string_t regexp;
725
726	/* Initialize the pattern buffer. */
727	preg->fastmap_accurate = 0;
728	preg->syntax = syntax;
729	preg->not_bol = preg->not_eol = 0;
730	preg->used = 0;
731	preg->re_nsub = 0;
732	preg->can_be_null = 0;
733	preg->regs_allocated = REGS_UNALLOCATED;
734
735	/* Initialize the dfa. */
736	dfa = (re_dfa_t *) preg->buffer;
737	if (BE (preg->allocated < sizeof (re_dfa_t), 0))
738	{
739	/* If zero allocated, but buffer is non-null, try to realloc
740	enough space. This loses if buffer's address is bogus, but
741	that is the user's responsibility. If ->buffer is NULL this
742	is a simple allocation. */
743	dfa = re_realloc (preg->buffer, re_dfa_t, 1);
744	if (dfa == NULL)
745	return REG_ESPACE;
746	preg->allocated = sizeof (re_dfa_t);
747	preg->buffer = (unsigned char *) dfa;
748	}
749	preg->used = sizeof (re_dfa_t);
750
751	err = init_dfa (dfa, length);
752	if (BE (err != REG_NOERROR, 0))
753	{
754	free_dfa_content (dfa);
755	preg->buffer = NULL;
756	preg->allocated = 0;
757	return err;
758	}
759	#ifdef DEBUG
760	/* Note: length+1 will not overflow since it is checked in init_dfa. */
761	dfa->re_str = re_malloc (char, length + 1);
762	strncpy (dfa->re_str, pattern, length + 1);
763	#endif
764
765	__libc_lock_init (dfa->lock);
766
767	err = re_string_construct (&regexp, pattern, length, preg->translate,
768	syntax & RE_ICASE, dfa);
769	if (BE (err != REG_NOERROR, 0))
770	{
771	re_compile_internal_free_return:
772	free_workarea_compile (preg);
773	re_string_destruct (&regexp);
774	free_dfa_content (dfa);
775	preg->buffer = NULL;
776	preg->allocated = 0;
777	return err;
778	}
779
780	/* Parse the regular expression, and build a structure tree. */
781	preg->re_nsub = 0;
782	dfa->str_tree = parse (&regexp, preg, syntax, &err);
783	if (BE (dfa->str_tree == NULL, 0))
784	goto re_compile_internal_free_return;
785
786	/* Analyze the tree and create the nfa. */
787	err = analyze (preg);
788	if (BE (err != REG_NOERROR, 0))
789	goto re_compile_internal_free_return;
790
791	#ifdef RE_ENABLE_I18N
792	/* If possible, do searching in single byte encoding to speed things up. */
793	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
794	optimize_utf8 (dfa);
795	#endif
796
797	/* Then create the initial state of the dfa. */
798	err = create_initial_state (dfa);
799
800	/* Release work areas. */
801	free_workarea_compile (preg);
802	re_string_destruct (&regexp);
803
804	if (BE (err != REG_NOERROR, 0))
805	{
806	free_dfa_content (dfa);
807	preg->buffer = NULL;
808	preg->allocated = 0;
809	}
810
811	return err;
812	}
813
814	/* Initialize DFA. We use the length of the regular expression PAT_LEN
815	as the initial length of some arrays. */
816
817	static reg_errcode_t
818	init_dfa (re_dfa_t *dfa, size_t pat_len)
819	{
820	unsigned int table_size;
821	#ifndef _LIBC
822	char *codeset_name;
823	#endif
824
825	memset (dfa, '\0', sizeof (re_dfa_t));
826
827	/* Force allocation of str_tree_storage the first time. */
828	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
829
830	/* Avoid overflows. */
831	if (pat_len == SIZE_MAX)
832	return REG_ESPACE;
833
834	dfa->nodes_alloc = pat_len + 1;
835	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
836
837	/* table_size = 2 ^ ceil(log pat_len) */
838	for (table_size = 1; ; table_size <<= 1)
839	if (table_size > pat_len)
840	break;
841
842	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
843	dfa->state_hash_mask = table_size - 1;
844
845	dfa->mb_cur_max = MB_CUR_MAX;
846	#ifdef _LIBC
847	if (dfa->mb_cur_max == 6
848	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
849	dfa->is_utf8 = 1;
850	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
851	!= 0);
852	#else
853	# ifdef KBUILD_OS_WINDOWS
854	(void)codeset_name;
855	if (get_crt_codepage() == MY_CP_UTF8)
856	# else
857	# ifdef HAVE_LANGINFO_CODESET
858	codeset_name = nl_langinfo (CODESET);
859	# else
860	codeset_name = getenv ("LC_ALL");
861	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
862	codeset_name = getenv ("LC_CTYPE");
863	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
864	codeset_name = getenv ("LANG");
865	if (codeset_name == NULL)
866	codeset_name = "";
867	else if (strchr (codeset_name, '.') != NULL)
868	codeset_name = strchr (codeset_name, '.') + 1;
869	# endif
870
871	if (strcasecmp (codeset_name, "UTF-8") == 0
872	\|\| strcasecmp (codeset_name, "UTF8") == 0)
873	# endif
874	dfa->is_utf8 = 1;
875
876	/* We check exhaustively in the loop below if this charset is a
877	superset of ASCII. */
878	dfa->map_notascii = 0;
879	#endif
880
881	#ifdef RE_ENABLE_I18N
882	if (dfa->mb_cur_max > 1)
883	{
884	if (dfa->is_utf8)
885	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
886	else
887	{
888	int i, j, ch;
889
890	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
891	if (BE (dfa->sb_char == NULL, 0))
892	return REG_ESPACE;
893
894	/* Set the bits corresponding to single byte chars. */
895	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
896	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
897	{
898	wint_t wch = __btowc (ch);
899	if (wch != WEOF)
900	dfa->sb_char[i] \|= (bitset_word_t) 1 << j;
901	# ifndef _LIBC
902	if (isascii (ch) && wch != ch)
903	dfa->map_notascii = 1;
904	# endif
905	}
906	}
907	}
908	#endif
909
910	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, 0))
911	return REG_ESPACE;
912	return REG_NOERROR;
913	}
914
915	/* Initialize WORD_CHAR table, which indicate which character is
916	"word". In this case "word" means that it is the word construction
917	character used by some operators like "\<", "\>", etc. */
918
919	static void
920	internal_function
921	init_word_char (re_dfa_t *dfa)
922	{
923	int i, j, ch;
924	dfa->word_ops_used = 1;
925	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
926	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
927	if (isalnum (ch) \|\| ch == '_')
928	dfa->word_char[i] \|= (bitset_word_t) 1 << j;
929	}
930
931	/* Free the work area which are only used while compiling. */
932
933	static void
934	free_workarea_compile (regex_t *preg)
935	{
936	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
937	bin_tree_storage_t storage, next;
938	for (storage = dfa->str_tree_storage; storage; storage = next)
939	{
940	next = storage->next;
941	re_free (storage);
942	}
943	dfa->str_tree_storage = NULL;
944	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
945	dfa->str_tree = NULL;
946	re_free (dfa->org_indices);
947	dfa->org_indices = NULL;
948	}
949
950	/* Create initial states for all contexts. */
951
952	static reg_errcode_t
953	create_initial_state (re_dfa_t *dfa)
954	{
955	int first, i;
956	reg_errcode_t err;
957	re_node_set init_nodes;
958
959	/* Initial states have the epsilon closure of the node which is
960	the first node of the regular expression. */
961	first = dfa->str_tree->first->node_idx;
962	dfa->init_node = first;
963	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
964	if (BE (err != REG_NOERROR, 0))
965	return err;
966
967	/* The back-references which are in initial states can epsilon transit,
968	since in this case all of the subexpressions can be null.
969	Then we add epsilon closures of the nodes which are the next nodes of
970	the back-references. */
971	if (dfa->nbackref > 0)
972	for (i = 0; i < init_nodes.nelem; ++i)
973	{
974	int node_idx = init_nodes.elems[i];
975	re_token_type_t type = dfa->nodes[node_idx].type;
976
977	int clexp_idx;
978	if (type != OP_BACK_REF)
979	continue;
980	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
981	{
982	re_token_t *clexp_node;
983	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
984	if (clexp_node->type == OP_CLOSE_SUBEXP
985	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
986	break;
987	}
988	if (clexp_idx == init_nodes.nelem)
989	continue;
990
991	if (type == OP_BACK_REF)
992	{
993	int dest_idx = dfa->edests[node_idx].elems[0];
994	if (!re_node_set_contains (&init_nodes, dest_idx))
995	{
996	re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
997	i = 0;
998	}
999	}
1000	}
1001
1002	/* It must be the first time to invoke acquire_state. */
1003	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1004	/* We don't check ERR here, since the initial state must not be NULL. */
1005	if (BE (dfa->init_state == NULL, 0))
1006	return err;
1007	if (dfa->init_state->has_constraint)
1008	{
1009	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1010	CONTEXT_WORD);
1011	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1012	CONTEXT_NEWLINE);
1013	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1014	&init_nodes,
1015	CONTEXT_NEWLINE
1016	\| CONTEXT_BEGBUF);
1017	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
1018	\|\| dfa->init_state_begbuf == NULL, 0))
1019	return err;
1020	}
1021	else
1022	dfa->init_state_word = dfa->init_state_nl
1023	= dfa->init_state_begbuf = dfa->init_state;
1024
1025	re_node_set_free (&init_nodes);
1026	return REG_NOERROR;
1027	}
1028
1029
1030	#ifdef RE_ENABLE_I18N
1031	/* If it is possible to do searching in single byte encoding instead of UTF-8
1032	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1033	DFA nodes where needed. */
1034
1035	static void
1036	optimize_utf8 (re_dfa_t *dfa)
1037	{
1038	int node, i, mb_chars = 0, has_period = 0;
1039
1040	for (node = 0; node < dfa->nodes_len; ++node)
1041	switch (dfa->nodes[node].type)
1042	{
1043	case CHARACTER:
1044	if (dfa->nodes[node].opr.c >= 0x80)
1045	mb_chars = 1;
1046	break;
1047	case ANCHOR:
1048	switch (dfa->nodes[node].opr.idx)
1049	{
1050	case LINE_FIRST:
1051	case LINE_LAST:
1052	case BUF_FIRST:
1053	case BUF_LAST:
1054	break;
1055	default:
1056	/* Word anchors etc. cannot be handled. */
1057	return;
1058	}
1059	break;
1060	case OP_PERIOD:
1061	has_period = 1;
1062	break;
1063	case OP_BACK_REF:
1064	case OP_ALT:
1065	case END_OF_RE:
1066	case OP_DUP_ASTERISK:
1067	case OP_OPEN_SUBEXP:
1068	case OP_CLOSE_SUBEXP:
1069	break;
1070	case COMPLEX_BRACKET:
1071	return;
1072	case SIMPLE_BRACKET:
1073	/* Just double check. The non-ASCII range starts at 0x80. */
1074	assert (0x80 % BITSET_WORD_BITS == 0);
1075	for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1076	if (dfa->nodes[node].opr.sbcset[i])
1077	return;
1078	break;
1079	default:
1080	abort ();
1081	}
1082
1083	if (mb_chars \|\| has_period)
1084	for (node = 0; node < dfa->nodes_len; ++node)
1085	{
1086	if (dfa->nodes[node].type == CHARACTER
1087	&& dfa->nodes[node].opr.c >= 0x80)
1088	dfa->nodes[node].mb_partial = 0;
1089	else if (dfa->nodes[node].type == OP_PERIOD)
1090	dfa->nodes[node].type = OP_UTF8_PERIOD;
1091	}
1092
1093	/* The search can be in single byte locale. */
1094	dfa->mb_cur_max = 1;
1095	dfa->is_utf8 = 0;
1096	dfa->has_mb_node = dfa->nbackref > 0 \|\| has_period;
1097	}
1098	#endif
1099
1100
1101	/* Analyze the structure tree, and calculate "first", "next", "edest",
1102	"eclosure", and "inveclosure". */
1103
1104	static reg_errcode_t
1105	analyze (regex_t *preg)
1106	{
1107	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1108	reg_errcode_t ret;
1109
1110	/* Allocate arrays. */
1111	dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1112	dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1113	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1114	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1115	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
1116	\|\| dfa->eclosures == NULL, 0))
1117	return REG_ESPACE;
1118
1119	dfa->subexp_map = re_malloc (int, preg->re_nsub);
1120	if (dfa->subexp_map != NULL)
1121	{
1122	int i;
1123	for (i = 0; i < preg->re_nsub; i++)
1124	dfa->subexp_map[i] = i;
1125	preorder (dfa->str_tree, optimize_subexps, dfa);
1126	for (i = 0; i < preg->re_nsub; i++)
1127	if (dfa->subexp_map[i] != i)
1128	break;
1129	if (i == preg->re_nsub)
1130	{
1131	free (dfa->subexp_map);
1132	dfa->subexp_map = NULL;
1133	}
1134	}
1135
1136	ret = postorder (dfa->str_tree, lower_subexps, preg);
1137	if (BE (ret != REG_NOERROR, 0))
1138	return ret;
1139	ret = postorder (dfa->str_tree, calc_first, dfa);
1140	if (BE (ret != REG_NOERROR, 0))
1141	return ret;
1142	preorder (dfa->str_tree, calc_next, dfa);
1143	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1144	if (BE (ret != REG_NOERROR, 0))
1145	return ret;
1146	ret = calc_eclosure (dfa);
1147	if (BE (ret != REG_NOERROR, 0))
1148	return ret;
1149
1150	/* We only need this during the prune_impossible_nodes pass in regexec.c;
1151	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1152	if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1153	\|\| dfa->nbackref)
1154	{
1155	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1156	if (BE (dfa->inveclosures == NULL, 0))
1157	return REG_ESPACE;
1158	ret = calc_inveclosure (dfa);
1159	}
1160
1161	return ret;
1162	}
1163
1164	/* Our parse trees are very unbalanced, so we cannot use a stack to
1165	implement parse tree visits. Instead, we use parent pointers and
1166	some hairy code in these two functions. */
1167	static reg_errcode_t
1168	postorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1169	void *extra)
1170	{
1171	bin_tree_t node, prev;
1172
1173	for (node = root; ; )
1174	{
1175	/* Descend down the tree, preferably to the left (or to the right
1176	if that's the only child). */
1177	while (node->left \|\| node->right)
1178	if (node->left)
1179	node = node->left;
1180	else
1181	node = node->right;
1182
1183	do
1184	{
1185	reg_errcode_t err = fn (extra, node);
1186	if (BE (err != REG_NOERROR, 0))
1187	return err;
1188	if (node->parent == NULL)
1189	return REG_NOERROR;
1190	prev = node;
1191	node = node->parent;
1192	}
1193	/* Go up while we have a node that is reached from the right. */
1194	while (node->right == prev \|\| node->right == NULL);
1195	node = node->right;
1196	}
1197	}
1198
1199	static reg_errcode_t
1200	preorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1201	void *extra)
1202	{
1203	bin_tree_t *node;
1204
1205	for (node = root; ; )
1206	{
1207	reg_errcode_t err = fn (extra, node);
1208	if (BE (err != REG_NOERROR, 0))
1209	return err;
1210
1211	/* Go to the left node, or up and to the right. */
1212	if (node->left)
1213	node = node->left;
1214	else
1215	{
1216	bin_tree_t *prev = NULL;
1217	while (node->right == prev \|\| node->right == NULL)
1218	{
1219	prev = node;
1220	node = node->parent;
1221	if (!node)
1222	return REG_NOERROR;
1223	}
1224	node = node->right;
1225	}
1226	}
1227	}
1228
1229	/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1230	re_search_internal to map the inner one's opr.idx to this one's. Adjust
1231	backreferences as well. Requires a preorder visit. */
1232	static reg_errcode_t
1233	optimize_subexps (void extra, bin_tree_t node)
1234	{
1235	re_dfa_t dfa = (re_dfa_t ) extra;
1236
1237	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1238	{
1239	int idx = node->token.opr.idx;
1240	node->token.opr.idx = dfa->subexp_map[idx];
1241	dfa->used_bkref_map \|= 1 << node->token.opr.idx;
1242	}
1243
1244	else if (node->token.type == SUBEXP
1245	&& node->left && node->left->token.type == SUBEXP)
1246	{
1247	int other_idx = node->left->token.opr.idx;
1248
1249	node->left = node->left->left;
1250	if (node->left)
1251	node->left->parent = node;
1252
1253	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1254	if (other_idx < BITSET_WORD_BITS)
1255	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1256	}
1257
1258	return REG_NOERROR;
1259	}
1260
1261	/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1262	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1263	static reg_errcode_t
1264	lower_subexps (void extra, bin_tree_t node)
1265	{
1266	regex_t preg = (regex_t ) extra;
1267	reg_errcode_t err = REG_NOERROR;
1268
1269	if (node->left && node->left->token.type == SUBEXP)
1270	{
1271	node->left = lower_subexp (&err, preg, node->left);
1272	if (node->left)
1273	node->left->parent = node;
1274	}
1275	if (node->right && node->right->token.type == SUBEXP)
1276	{
1277	node->right = lower_subexp (&err, preg, node->right);
1278	if (node->right)
1279	node->right->parent = node;
1280	}
1281
1282	return err;
1283	}
1284
1285	static bin_tree_t *
1286	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
1287	{
1288	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1289	bin_tree_t *body = node->left;
1290	bin_tree_t op, cls, tree1, tree;
1291
1292	if (preg->no_sub
1293	/* We do not optimize empty subexpressions, because otherwise we may
1294	have bad CONCAT nodes with NULL children. This is obviously not
1295	very common, so we do not lose much. An example that triggers
1296	this case is the sed "script" //x. */
1297	&& node->left != NULL
1298	&& (node->token.opr.idx >= BITSET_WORD_BITS
1299	\|\| !(dfa->used_bkref_map
1300	& ((bitset_word_t) 1 << node->token.opr.idx))))
1301	return node->left;
1302
1303	/* Convert the SUBEXP node to the concatenation of an
1304	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1305	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1306	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1307	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1308	tree = create_tree (dfa, op, tree1, CONCAT);
1309	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, 0))
1310	{
1311	*err = REG_ESPACE;
1312	return NULL;
1313	}
1314
1315	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1316	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1317	return tree;
1318	}
1319
1320	/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1321	nodes. Requires a postorder visit. */
1322	static reg_errcode_t
1323	calc_first (void extra, bin_tree_t node)
1324	{
1325	re_dfa_t dfa = (re_dfa_t ) extra;
1326	if (node->token.type == CONCAT)
1327	{
1328	node->first = node->left->first;
1329	node->node_idx = node->left->node_idx;
1330	}
1331	else
1332	{
1333	node->first = node;
1334	node->node_idx = re_dfa_add_node (dfa, node->token);
1335	if (BE (node->node_idx == -1, 0))
1336	return REG_ESPACE;
1337	}
1338	return REG_NOERROR;
1339	}
1340
1341	/* Pass 2: compute NEXT on the tree. Preorder visit. */
1342	static reg_errcode_t
1343	calc_next (void extra, bin_tree_t node)
1344	{
1345	switch (node->token.type)
1346	{
1347	case OP_DUP_ASTERISK:
1348	node->left->next = node;
1349	break;
1350	case CONCAT:
1351	node->left->next = node->right->first;
1352	node->right->next = node->next;
1353	break;
1354	default:
1355	if (node->left)
1356	node->left->next = node->next;
1357	if (node->right)
1358	node->right->next = node->next;
1359	break;
1360	}
1361	return REG_NOERROR;
1362	}
1363
1364	/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1365	static reg_errcode_t
1366	link_nfa_nodes (void extra, bin_tree_t node)
1367	{
1368	re_dfa_t dfa = (re_dfa_t ) extra;
1369	int idx = node->node_idx;
1370	reg_errcode_t err = REG_NOERROR;
1371
1372	switch (node->token.type)
1373	{
1374	case CONCAT:
1375	break;
1376
1377	case END_OF_RE:
1378	assert (node->next == NULL);
1379	break;
1380
1381	case OP_DUP_ASTERISK:
1382	case OP_ALT:
1383	{
1384	int left, right;
1385	dfa->has_plural_match = 1;
1386	if (node->left != NULL)
1387	left = node->left->first->node_idx;
1388	else
1389	left = node->next->node_idx;
1390	if (node->right != NULL)
1391	right = node->right->first->node_idx;
1392	else
1393	right = node->next->node_idx;
1394	assert (left > -1);
1395	assert (right > -1);
1396	err = re_node_set_init_2 (dfa->edests + idx, left, right);
1397	}
1398	break;
1399
1400	case ANCHOR:
1401	case OP_OPEN_SUBEXP:
1402	case OP_CLOSE_SUBEXP:
1403	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1404	break;
1405
1406	case OP_BACK_REF:
1407	dfa->nexts[idx] = node->next->node_idx;
1408	if (node->token.type == OP_BACK_REF)
1409	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1410	break;
1411
1412	default:
1413	assert (!IS_EPSILON_NODE (node->token.type));
1414	dfa->nexts[idx] = node->next->node_idx;
1415	break;
1416	}
1417
1418	return err;
1419	}
1420
1421	/* Duplicate the epsilon closure of the node ROOT_NODE.
1422	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1423	to their own constraint. */
1424
1425	static reg_errcode_t
1426	internal_function
1427	duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1428	int root_node, unsigned int init_constraint)
1429	{
1430	int org_node, clone_node, ret;
1431	unsigned int constraint = init_constraint;
1432	for (org_node = top_org_node, clone_node = top_clone_node;;)
1433	{
1434	int org_dest, clone_dest;
1435	if (dfa->nodes[org_node].type == OP_BACK_REF)
1436	{
1437	/* If the back reference epsilon-transit, its destination must
1438	also have the constraint. Then duplicate the epsilon closure
1439	of the destination of the back reference, and store it in
1440	edests of the back reference. */
1441	org_dest = dfa->nexts[org_node];
1442	re_node_set_empty (dfa->edests + clone_node);
1443	clone_dest = duplicate_node (dfa, org_dest, constraint);
1444	if (BE (clone_dest == -1, 0))
1445	return REG_ESPACE;
1446	dfa->nexts[clone_node] = dfa->nexts[org_node];
1447	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1448	if (BE (ret < 0, 0))
1449	return REG_ESPACE;
1450	}
1451	else if (dfa->edests[org_node].nelem == 0)
1452	{
1453	/* In case of the node can't epsilon-transit, don't duplicate the
1454	destination and store the original destination as the
1455	destination of the node. */
1456	dfa->nexts[clone_node] = dfa->nexts[org_node];
1457	break;
1458	}
1459	else if (dfa->edests[org_node].nelem == 1)
1460	{
1461	/* In case of the node can epsilon-transit, and it has only one
1462	destination. */
1463	org_dest = dfa->edests[org_node].elems[0];
1464	re_node_set_empty (dfa->edests + clone_node);
1465	if (dfa->nodes[org_node].type == ANCHOR)
1466	{
1467	/* In case of the node has another constraint, append it. */
1468	if (org_node == root_node && clone_node != org_node)
1469	{
1470	/* ...but if the node is root_node itself, it means the
1471	epsilon closure have a loop, then tie it to the
1472	destination of the root_node. */
1473	ret = re_node_set_insert (dfa->edests + clone_node,
1474	org_dest);
1475	if (BE (ret < 0, 0))
1476	return REG_ESPACE;
1477	break;
1478	}
1479	constraint \|= dfa->nodes[org_node].opr.ctx_type;
1480	}
1481	clone_dest = duplicate_node (dfa, org_dest, constraint);
1482	if (BE (clone_dest == -1, 0))
1483	return REG_ESPACE;
1484	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1485	if (BE (ret < 0, 0))
1486	return REG_ESPACE;
1487	}
1488	else /* dfa->edests[org_node].nelem == 2 */
1489	{
1490	/* In case of the node can epsilon-transit, and it has two
1491	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1492	org_dest = dfa->edests[org_node].elems[0];
1493	re_node_set_empty (dfa->edests + clone_node);
1494	/* Search for a duplicated node which satisfies the constraint. */
1495	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1496	if (clone_dest == -1)
1497	{
1498	/* There are no such a duplicated node, create a new one. */
1499	reg_errcode_t err;
1500	clone_dest = duplicate_node (dfa, org_dest, constraint);
1501	if (BE (clone_dest == -1, 0))
1502	return REG_ESPACE;
1503	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1504	if (BE (ret < 0, 0))
1505	return REG_ESPACE;
1506	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1507	root_node, constraint);
1508	if (BE (err != REG_NOERROR, 0))
1509	return err;
1510	}
1511	else
1512	{
1513	/* There are a duplicated node which satisfy the constraint,
1514	use it to avoid infinite loop. */
1515	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1516	if (BE (ret < 0, 0))
1517	return REG_ESPACE;
1518	}
1519
1520	org_dest = dfa->edests[org_node].elems[1];
1521	clone_dest = duplicate_node (dfa, org_dest, constraint);
1522	if (BE (clone_dest == -1, 0))
1523	return REG_ESPACE;
1524	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1525	if (BE (ret < 0, 0))
1526	return REG_ESPACE;
1527	}
1528	org_node = org_dest;
1529	clone_node = clone_dest;
1530	}
1531	return REG_NOERROR;
1532	}
1533
1534	/* Search for a node which is duplicated from the node ORG_NODE, and
1535	satisfies the constraint CONSTRAINT. */
1536
1537	static int
1538	search_duplicated_node (const re_dfa_t *dfa, int org_node,
1539	unsigned int constraint)
1540	{
1541	int idx;
1542	for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1543	{
1544	if (org_node == dfa->org_indices[idx]
1545	&& constraint == dfa->nodes[idx].constraint)
1546	return idx; /* Found. */
1547	}
1548	return -1; /* Not found. */
1549	}
1550
1551	/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1552	Return the index of the new node, or -1 if insufficient storage is
1553	available. */
1554
1555	static int
1556	duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1557	{
1558	int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1559	if (BE (dup_idx != -1, 1))
1560	{
1561	dfa->nodes[dup_idx].constraint = constraint;
1562	if (dfa->nodes[org_idx].type == ANCHOR)
1563	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].opr.ctx_type;
1564	dfa->nodes[dup_idx].duplicated = 1;
1565
1566	/* Store the index of the original node. */
1567	dfa->org_indices[dup_idx] = org_idx;
1568	}
1569	return dup_idx;
1570	}
1571
1572	static reg_errcode_t
1573	calc_inveclosure (re_dfa_t *dfa)
1574	{
1575	int src, idx, ret;
1576	for (idx = 0; idx < dfa->nodes_len; ++idx)
1577	re_node_set_init_empty (dfa->inveclosures + idx);
1578
1579	for (src = 0; src < dfa->nodes_len; ++src)
1580	{
1581	int *elems = dfa->eclosures[src].elems;
1582	for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1583	{
1584	ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1585	if (BE (ret == -1, 0))
1586	return REG_ESPACE;
1587	}
1588	}
1589
1590	return REG_NOERROR;
1591	}
1592
1593	/* Calculate "eclosure" for all the node in DFA. */
1594
1595	static reg_errcode_t
1596	calc_eclosure (re_dfa_t *dfa)
1597	{
1598	int node_idx, incomplete;
1599	#ifdef DEBUG
1600	assert (dfa->nodes_len > 0);
1601	#endif
1602	incomplete = 0;
1603	/* For each nodes, calculate epsilon closure. */
1604	for (node_idx = 0; ; ++node_idx)
1605	{
1606	reg_errcode_t err;
1607	re_node_set eclosure_elem;
1608	if (node_idx == dfa->nodes_len)
1609	{
1610	if (!incomplete)
1611	break;
1612	incomplete = 0;
1613	node_idx = 0;
1614	}
1615
1616	#ifdef DEBUG
1617	assert (dfa->eclosures[node_idx].nelem != -1);
1618	#endif
1619
1620	/* If we have already calculated, skip it. */
1621	if (dfa->eclosures[node_idx].nelem != 0)
1622	continue;
1623	/* Calculate epsilon closure of `node_idx'. */
1624	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1625	if (BE (err != REG_NOERROR, 0))
1626	return err;
1627
1628	if (dfa->eclosures[node_idx].nelem == 0)
1629	{
1630	incomplete = 1;
1631	re_node_set_free (&eclosure_elem);
1632	}
1633	}
1634	return REG_NOERROR;
1635	}
1636
1637	/* Calculate epsilon closure of NODE. */
1638
1639	static reg_errcode_t
1640	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, int node, int root)
1641	{
1642	reg_errcode_t err;
1643	unsigned int constraint;
1644	int i, incomplete;
1645	re_node_set eclosure;
1646	incomplete = 0;
1647	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1648	if (BE (err != REG_NOERROR, 0))
1649	return err;
1650
1651	/* This indicates that we are calculating this node now.
1652	We reference this value to avoid infinite loop. */
1653	dfa->eclosures[node].nelem = -1;
1654
1655	constraint = ((dfa->nodes[node].type == ANCHOR)
1656	? dfa->nodes[node].opr.ctx_type : 0);
1657	/* If the current node has constraints, duplicate all nodes.
1658	Since they must inherit the constraints. */
1659	if (constraint
1660	&& dfa->edests[node].nelem
1661	&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1662	{
1663	int org_node, cur_node;
1664	org_node = cur_node = node;
1665	err = duplicate_node_closure (dfa, node, node, node, constraint);
1666	if (BE (err != REG_NOERROR, 0))
1667	return err;
1668	}
1669
1670	/* Expand each epsilon destination nodes. */
1671	if (IS_EPSILON_NODE(dfa->nodes[node].type))
1672	for (i = 0; i < dfa->edests[node].nelem; ++i)
1673	{
1674	re_node_set eclosure_elem;
1675	int edest = dfa->edests[node].elems[i];
1676	/* If calculating the epsilon closure of `edest' is in progress,
1677	return intermediate result. */
1678	if (dfa->eclosures[edest].nelem == -1)
1679	{
1680	incomplete = 1;
1681	continue;
1682	}
1683	/* If we haven't calculated the epsilon closure of `edest' yet,
1684	calculate now. Otherwise use calculated epsilon closure. */
1685	if (dfa->eclosures[edest].nelem == 0)
1686	{
1687	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1688	if (BE (err != REG_NOERROR, 0))
1689	return err;
1690	}
1691	else
1692	eclosure_elem = dfa->eclosures[edest];
1693	/* Merge the epsilon closure of `edest'. */
1694	re_node_set_merge (&eclosure, &eclosure_elem);
1695	/* If the epsilon closure of `edest' is incomplete,
1696	the epsilon closure of this node is also incomplete. */
1697	if (dfa->eclosures[edest].nelem == 0)
1698	{
1699	incomplete = 1;
1700	re_node_set_free (&eclosure_elem);
1701	}
1702	}
1703
1704	/* Epsilon closures include itself. */
1705	re_node_set_insert (&eclosure, node);
1706	if (incomplete && !root)
1707	dfa->eclosures[node].nelem = 0;
1708	else
1709	dfa->eclosures[node] = eclosure;
1710	*new_set = eclosure;
1711	return REG_NOERROR;
1712	}
1713
1714
1715	/* Functions for token which are used in the parser. */
1716
1717	/* Fetch a token from INPUT.
1718	We must not use this function inside bracket expressions. */
1719
1720	static void
1721	internal_function
1722	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
1723	{
1724	re_string_skip_bytes (input, peek_token (result, input, syntax));
1725	}
1726
1727	/* Peek a token from INPUT, and return the length of the token.
1728	We must not use this function inside bracket expressions. */
1729
1730	static int
1731	internal_function
1732	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
1733	{
1734	unsigned char c;
1735
1736	if (re_string_eoi (input))
1737	{
1738	token->type = END_OF_RE;
1739	return 0;
1740	}
1741
1742	c = re_string_peek_byte (input, 0);
1743	token->opr.c = c;
1744
1745	token->word_char = 0;
1746	#ifdef RE_ENABLE_I18N
1747	token->mb_partial = 0;
1748	if (input->mb_cur_max > 1 &&
1749	!re_string_first_byte (input, re_string_cur_idx (input)))
1750	{
1751	token->type = CHARACTER;
1752	token->mb_partial = 1;
1753	return 1;
1754	}
1755	#endif
1756	if (c == '\\')
1757	{
1758	unsigned char c2;
1759	if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1760	{
1761	token->type = BACK_SLASH;
1762	return 1;
1763	}
1764
1765	c2 = re_string_peek_byte_case (input, 1);
1766	token->opr.c = c2;
1767	token->type = CHARACTER;
1768	#ifdef RE_ENABLE_I18N
1769	if (input->mb_cur_max > 1)
1770	{
1771	wint_t wc = re_string_wchar_at (input,
1772	re_string_cur_idx (input) + 1);
1773	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1774	}
1775	else
1776	#endif
1777	token->word_char = IS_WORD_CHAR (c2) != 0;
1778
1779	switch (c2)
1780	{
1781	case '\|':
1782	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1783	token->type = OP_ALT;
1784	break;
1785	case '1': case '2': case '3': case '4': case '5':
1786	case '6': case '7': case '8': case '9':
1787	if (!(syntax & RE_NO_BK_REFS))
1788	{
1789	token->type = OP_BACK_REF;
1790	token->opr.idx = c2 - '1';
1791	}
1792	break;
1793	case '<':
1794	if (!(syntax & RE_NO_GNU_OPS))
1795	{
1796	token->type = ANCHOR;
1797	token->opr.ctx_type = WORD_FIRST;
1798	}
1799	break;
1800	case '>':
1801	if (!(syntax & RE_NO_GNU_OPS))
1802	{
1803	token->type = ANCHOR;
1804	token->opr.ctx_type = WORD_LAST;
1805	}
1806	break;
1807	case 'b':
1808	if (!(syntax & RE_NO_GNU_OPS))
1809	{
1810	token->type = ANCHOR;
1811	token->opr.ctx_type = WORD_DELIM;
1812	}
1813	break;
1814	case 'B':
1815	if (!(syntax & RE_NO_GNU_OPS))
1816	{
1817	token->type = ANCHOR;
1818	token->opr.ctx_type = NOT_WORD_DELIM;
1819	}
1820	break;
1821	case 'w':
1822	if (!(syntax & RE_NO_GNU_OPS))
1823	token->type = OP_WORD;
1824	break;
1825	case 'W':
1826	if (!(syntax & RE_NO_GNU_OPS))
1827	token->type = OP_NOTWORD;
1828	break;
1829	case 's':
1830	if (!(syntax & RE_NO_GNU_OPS))
1831	token->type = OP_SPACE;
1832	break;
1833	case 'S':
1834	if (!(syntax & RE_NO_GNU_OPS))
1835	token->type = OP_NOTSPACE;
1836	break;
1837	case '`':
1838	if (!(syntax & RE_NO_GNU_OPS))
1839	{
1840	token->type = ANCHOR;
1841	token->opr.ctx_type = BUF_FIRST;
1842	}
1843	break;
1844	case '\'':
1845	if (!(syntax & RE_NO_GNU_OPS))
1846	{
1847	token->type = ANCHOR;
1848	token->opr.ctx_type = BUF_LAST;
1849	}
1850	break;
1851	case '(':
1852	if (!(syntax & RE_NO_BK_PARENS))
1853	token->type = OP_OPEN_SUBEXP;
1854	break;
1855	case ')':
1856	if (!(syntax & RE_NO_BK_PARENS))
1857	token->type = OP_CLOSE_SUBEXP;
1858	break;
1859	case '+':
1860	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1861	token->type = OP_DUP_PLUS;
1862	break;
1863	case '?':
1864	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1865	token->type = OP_DUP_QUESTION;
1866	break;
1867	case '{':
1868	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1869	token->type = OP_OPEN_DUP_NUM;
1870	break;
1871	case '}':
1872	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1873	token->type = OP_CLOSE_DUP_NUM;
1874	break;
1875	default:
1876	break;
1877	}
1878	return 2;
1879	}
1880
1881	token->type = CHARACTER;
1882	#ifdef RE_ENABLE_I18N
1883	if (input->mb_cur_max > 1)
1884	{
1885	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1886	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1887	}
1888	else
1889	#endif
1890	token->word_char = IS_WORD_CHAR (token->opr.c);
1891
1892	switch (c)
1893	{
1894	case '\n':
1895	if (syntax & RE_NEWLINE_ALT)
1896	token->type = OP_ALT;
1897	break;
1898	case '\|':
1899	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1900	token->type = OP_ALT;
1901	break;
1902	case '*':
1903	token->type = OP_DUP_ASTERISK;
1904	break;
1905	case '+':
1906	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1907	token->type = OP_DUP_PLUS;
1908	break;
1909	case '?':
1910	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1911	token->type = OP_DUP_QUESTION;
1912	break;
1913	case '{':
1914	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1915	token->type = OP_OPEN_DUP_NUM;
1916	break;
1917	case '}':
1918	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1919	token->type = OP_CLOSE_DUP_NUM;
1920	break;
1921	case '(':
1922	if (syntax & RE_NO_BK_PARENS)
1923	token->type = OP_OPEN_SUBEXP;
1924	break;
1925	case ')':
1926	if (syntax & RE_NO_BK_PARENS)
1927	token->type = OP_CLOSE_SUBEXP;
1928	break;
1929	case '[':
1930	token->type = OP_OPEN_BRACKET;
1931	break;
1932	case '.':
1933	token->type = OP_PERIOD;
1934	break;
1935	case '^':
1936	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
1937	re_string_cur_idx (input) != 0)
1938	{
1939	char prev = re_string_peek_byte (input, -1);
1940	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != '\n')
1941	break;
1942	}
1943	token->type = ANCHOR;
1944	token->opr.ctx_type = LINE_FIRST;
1945	break;
1946	case '$':
1947	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1948	re_string_cur_idx (input) + 1 != re_string_length (input))
1949	{
1950	re_token_t next;
1951	re_string_skip_bytes (input, 1);
1952	peek_token (&next, input, syntax);
1953	re_string_skip_bytes (input, -1);
1954	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1955	break;
1956	}
1957	token->type = ANCHOR;
1958	token->opr.ctx_type = LINE_LAST;
1959	break;
1960	default:
1961	break;
1962	}
1963	return 1;
1964	}
1965
1966	/* Peek a token from INPUT, and return the length of the token.
1967	We must not use this function out of bracket expressions. */
1968
1969	static int
1970	internal_function
1971	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
1972	{
1973	unsigned char c;
1974	if (re_string_eoi (input))
1975	{
1976	token->type = END_OF_RE;
1977	return 0;
1978	}
1979	c = re_string_peek_byte (input, 0);
1980	token->opr.c = c;
1981
1982	#ifdef RE_ENABLE_I18N
1983	if (input->mb_cur_max > 1 &&
1984	!re_string_first_byte (input, re_string_cur_idx (input)))
1985	{
1986	token->type = CHARACTER;
1987	return 1;
1988	}
1989	#endif /* RE_ENABLE_I18N */
1990
1991	if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
1992	&& re_string_cur_idx (input) + 1 < re_string_length (input))
1993	{
1994	/* In this case, '\' escape a character. */
1995	unsigned char c2;
1996	re_string_skip_bytes (input, 1);
1997	c2 = re_string_peek_byte (input, 0);
1998	token->opr.c = c2;
1999	token->type = CHARACTER;
2000	return 1;
2001	}
2002	if (c == '[') /* '[' is a special char in a bracket exps. */
2003	{
2004	unsigned char c2;
2005	int token_len;
2006	if (re_string_cur_idx (input) + 1 < re_string_length (input))
2007	c2 = re_string_peek_byte (input, 1);
2008	else
2009	c2 = 0;
2010	token->opr.c = c2;
2011	token_len = 2;
2012	switch (c2)
2013	{
2014	case '.':
2015	token->type = OP_OPEN_COLL_ELEM;
2016	break;
2017	case '=':
2018	token->type = OP_OPEN_EQUIV_CLASS;
2019	break;
2020	case ':':
2021	if (syntax & RE_CHAR_CLASSES)
2022	{
2023	token->type = OP_OPEN_CHAR_CLASS;
2024	break;
2025	}
2026	/* else fall through. */
2027	default:
2028	token->type = CHARACTER;
2029	token->opr.c = c;
2030	token_len = 1;
2031	break;
2032	}
2033	return token_len;
2034	}
2035	switch (c)
2036	{
2037	case '-':
2038	token->type = OP_CHARSET_RANGE;
2039	break;
2040	case ']':
2041	token->type = OP_CLOSE_BRACKET;
2042	break;
2043	case '^':
2044	token->type = OP_NON_MATCH_LIST;
2045	break;
2046	default:
2047	token->type = CHARACTER;
2048	}
2049	return 1;
2050	}
2051
2052
2053	/* Functions for parser. */
2054
2055	/* Entry point of the parser.
2056	Parse the regular expression REGEXP and return the structure tree.
2057	If an error is occured, ERR is set by error code, and return NULL.
2058	This function build the following tree, from regular expression <reg_exp>:
2059	CAT
2060	/ \
2061	/ \
2062	<reg_exp> EOR
2063
2064	CAT means concatenation.
2065	EOR means end of regular expression. */
2066
2067	static bin_tree_t *
2068	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
2069	reg_errcode_t *err)
2070	{
2071	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2072	bin_tree_t tree, eor, *root;
2073	re_token_t current_token;
2074	dfa->syntax = syntax;
2075	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2076	tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2077	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2078	return NULL;
2079	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2080	if (tree != NULL)
2081	root = create_tree (dfa, tree, eor, CONCAT);
2082	else
2083	root = eor;
2084	if (BE (eor == NULL \|\| root == NULL, 0))
2085	{
2086	*err = REG_ESPACE;
2087	return NULL;
2088	}
2089	return root;
2090	}
2091
2092	/* This function build the following tree, from regular expression
2093	<branch1>\|<branch2>:
2094	ALT
2095	/ \
2096	/ \
2097	<branch1> <branch2>
2098
2099	ALT means alternative, which represents the operator `\|'. */
2100
2101	static bin_tree_t *
2102	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2103	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2104	{
2105	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2106	bin_tree_t tree, branch = NULL;
2107	tree = parse_branch (regexp, preg, token, syntax, nest, err);
2108	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2109	return NULL;
2110
2111	while (token->type == OP_ALT)
2112	{
2113	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2114	if (token->type != OP_ALT && token->type != END_OF_RE
2115	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2116	{
2117	branch = parse_branch (regexp, preg, token, syntax, nest, err);
2118	if (BE (*err != REG_NOERROR && branch == NULL, 0))
2119	return NULL;
2120	}
2121	else
2122	branch = NULL;
2123	tree = create_tree (dfa, tree, branch, OP_ALT);
2124	if (BE (tree == NULL, 0))
2125	{
2126	*err = REG_ESPACE;
2127	return NULL;
2128	}
2129	}
2130	return tree;
2131	}
2132
2133	/* This function build the following tree, from regular expression
2134	<exp1><exp2>:
2135	CAT
2136	/ \
2137	/ \
2138	<exp1> <exp2>
2139
2140	CAT means concatenation. */
2141
2142	static bin_tree_t *
2143	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
2144	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2145	{
2146	bin_tree_t tree, exp;
2147	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2148	tree = parse_expression (regexp, preg, token, syntax, nest, err);
2149	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2150	return NULL;
2151
2152	while (token->type != OP_ALT && token->type != END_OF_RE
2153	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2154	{
2155	exp = parse_expression (regexp, preg, token, syntax, nest, err);
2156	if (BE (*err != REG_NOERROR && exp == NULL, 0))
2157	{
2158	return NULL;
2159	}
2160	if (tree != NULL && exp != NULL)
2161	{
2162	tree = create_tree (dfa, tree, exp, CONCAT);
2163	if (tree == NULL)
2164	{
2165	*err = REG_ESPACE;
2166	return NULL;
2167	}
2168	}
2169	else if (tree == NULL)
2170	tree = exp;
2171	/* Otherwise exp == NULL, we don't need to create new tree. */
2172	}
2173	return tree;
2174	}
2175
2176	/* This function build the following tree, from regular expression a*:
2177	*
2178	\|
2179	a
2180	*/
2181
2182	static bin_tree_t *
2183	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
2184	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2185	{
2186	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2187	bin_tree_t *tree;
2188	switch (token->type)
2189	{
2190	case CHARACTER:
2191	tree = create_token_tree (dfa, NULL, NULL, token);
2192	if (BE (tree == NULL, 0))
2193	{
2194	*err = REG_ESPACE;
2195	return NULL;
2196	}
2197	#ifdef RE_ENABLE_I18N
2198	if (dfa->mb_cur_max > 1)
2199	{
2200	while (!re_string_eoi (regexp)
2201	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2202	{
2203	bin_tree_t *mbc_remain;
2204	fetch_token (token, regexp, syntax);
2205	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2206	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2207	if (BE (mbc_remain == NULL \|\| tree == NULL, 0))
2208	{
2209	*err = REG_ESPACE;
2210	return NULL;
2211	}
2212	}
2213	}
2214	#endif
2215	break;
2216	case OP_OPEN_SUBEXP:
2217	tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2218	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2219	return NULL;
2220	break;
2221	case OP_OPEN_BRACKET:
2222	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2223	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2224	return NULL;
2225	break;
2226	case OP_BACK_REF:
2227	if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2228	{
2229	*err = REG_ESUBREG;
2230	return NULL;
2231	}
2232	dfa->used_bkref_map \|= 1 << token->opr.idx;
2233	tree = create_token_tree (dfa, NULL, NULL, token);
2234	if (BE (tree == NULL, 0))
2235	{
2236	*err = REG_ESPACE;
2237	return NULL;
2238	}
2239	++dfa->nbackref;
2240	dfa->has_mb_node = 1;
2241	break;
2242	case OP_OPEN_DUP_NUM:
2243	if (syntax & RE_CONTEXT_INVALID_DUP)
2244	{
2245	*err = REG_BADRPT;
2246	return NULL;
2247	}
2248	/* FALLTHROUGH */
2249	case OP_DUP_ASTERISK:
2250	case OP_DUP_PLUS:
2251	case OP_DUP_QUESTION:
2252	if (syntax & RE_CONTEXT_INVALID_OPS)
2253	{
2254	*err = REG_BADRPT;
2255	return NULL;
2256	}
2257	else if (syntax & RE_CONTEXT_INDEP_OPS)
2258	{
2259	fetch_token (token, regexp, syntax);
2260	return parse_expression (regexp, preg, token, syntax, nest, err);
2261	}
2262	/* else fall through */
2263	case OP_CLOSE_SUBEXP:
2264	if ((token->type == OP_CLOSE_SUBEXP) &&
2265	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2266	{
2267	*err = REG_ERPAREN;
2268	return NULL;
2269	}
2270	/* else fall through */
2271	case OP_CLOSE_DUP_NUM:
2272	/* We treat it as a normal character. */
2273
2274	/* Then we can these characters as normal characters. */
2275	token->type = CHARACTER;
2276	/* mb_partial and word_char bits should be initialized already
2277	by peek_token. */
2278	tree = create_token_tree (dfa, NULL, NULL, token);
2279	if (BE (tree == NULL, 0))
2280	{
2281	*err = REG_ESPACE;
2282	return NULL;
2283	}
2284	break;
2285	case ANCHOR:
2286	if ((token->opr.ctx_type
2287	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2288	&& dfa->word_ops_used == 0)
2289	init_word_char (dfa);
2290	if (token->opr.ctx_type == WORD_DELIM
2291	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
2292	{
2293	bin_tree_t tree_first, tree_last;
2294	if (token->opr.ctx_type == WORD_DELIM)
2295	{
2296	token->opr.ctx_type = WORD_FIRST;
2297	tree_first = create_token_tree (dfa, NULL, NULL, token);
2298	token->opr.ctx_type = WORD_LAST;
2299	}
2300	else
2301	{
2302	token->opr.ctx_type = INSIDE_WORD;
2303	tree_first = create_token_tree (dfa, NULL, NULL, token);
2304	token->opr.ctx_type = INSIDE_NOTWORD;
2305	}
2306	tree_last = create_token_tree (dfa, NULL, NULL, token);
2307	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2308	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, 0))
2309	{
2310	*err = REG_ESPACE;
2311	return NULL;
2312	}
2313	}
2314	else
2315	{
2316	tree = create_token_tree (dfa, NULL, NULL, token);
2317	if (BE (tree == NULL, 0))
2318	{
2319	*err = REG_ESPACE;
2320	return NULL;
2321	}
2322	}
2323	/* We must return here, since ANCHORs can't be followed
2324	by repetition operators.
2325	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
2326	it must not be "<ANCHOR(^)><REPEAT()>". /
2327	fetch_token (token, regexp, syntax);
2328	return tree;
2329	case OP_PERIOD:
2330	tree = create_token_tree (dfa, NULL, NULL, token);
2331	if (BE (tree == NULL, 0))
2332	{
2333	*err = REG_ESPACE;
2334	return NULL;
2335	}
2336	if (dfa->mb_cur_max > 1)
2337	dfa->has_mb_node = 1;
2338	break;
2339	case OP_WORD:
2340	case OP_NOTWORD:
2341	tree = build_charclass_op (dfa, regexp->trans,
2342	(const unsigned char *) "alnum",
2343	(const unsigned char *) "_",
2344	token->type == OP_NOTWORD, err);
2345	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2346	return NULL;
2347	break;
2348	case OP_SPACE:
2349	case OP_NOTSPACE:
2350	tree = build_charclass_op (dfa, regexp->trans,
2351	(const unsigned char *) "space",
2352	(const unsigned char *) "",
2353	token->type == OP_NOTSPACE, err);
2354	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2355	return NULL;
2356	break;
2357	case OP_ALT:
2358	case END_OF_RE:
2359	return NULL;
2360	case BACK_SLASH:
2361	*err = REG_EESCAPE;
2362	return NULL;
2363	default:
2364	/* Must not happen? */
2365	#ifdef DEBUG
2366	assert (0);
2367	#endif
2368	return NULL;
2369	}
2370	fetch_token (token, regexp, syntax);
2371
2372	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
2373	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
2374	{
2375	tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2376	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2377	return NULL;
2378	/* In BRE consecutive duplications are not allowed. */
2379	if ((syntax & RE_CONTEXT_INVALID_DUP)
2380	&& (token->type == OP_DUP_ASTERISK
2381	\|\| token->type == OP_OPEN_DUP_NUM))
2382	{
2383	*err = REG_BADRPT;
2384	return NULL;
2385	}
2386	}
2387
2388	return tree;
2389	}
2390
2391	/* This function build the following tree, from regular expression
2392	(<reg_exp>):
2393	SUBEXP
2394	\|
2395	<reg_exp>
2396	*/
2397
2398	static bin_tree_t *
2399	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2400	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2401	{
2402	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2403	bin_tree_t *tree;
2404	size_t cur_nsub;
2405	cur_nsub = preg->re_nsub++;
2406
2407	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2408
2409	/* The subexpression may be a null string. */
2410	if (token->type == OP_CLOSE_SUBEXP)
2411	tree = NULL;
2412	else
2413	{
2414	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2415	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2416	*err = REG_EPAREN;
2417	if (BE (*err != REG_NOERROR, 0))
2418	return NULL;
2419	}
2420
2421	if (cur_nsub <= '9' - '1')
2422	dfa->completed_bkref_map \|= 1 << cur_nsub;
2423
2424	tree = create_tree (dfa, tree, NULL, SUBEXP);
2425	if (BE (tree == NULL, 0))
2426	{
2427	*err = REG_ESPACE;
2428	return NULL;
2429	}
2430	tree->token.opr.idx = cur_nsub;
2431	return tree;
2432	}
2433
2434	/* This function parse repetition operators like "", "+", "{1,3}" etc. /
2435
2436	static bin_tree_t *
2437	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
2438	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
2439	{
2440	bin_tree_t tree = NULL, old_tree = NULL;
2441	int i, start, end, start_idx = re_string_cur_idx (regexp);
2442	re_token_t start_token = *token;
2443
2444	if (token->type == OP_OPEN_DUP_NUM)
2445	{
2446	end = 0;
2447	start = fetch_number (regexp, token, syntax);
2448	if (start == -1)
2449	{
2450	if (token->type == CHARACTER && token->opr.c == ',')
2451	start = 0; /* We treat "{,m}" as "{0,m}". */
2452	else
2453	{
2454	err = REG_BADBR; / <re>{} is invalid. */
2455	return NULL;
2456	}
2457	}
2458	if (BE (start != -2, 1))
2459	{
2460	/* We treat "{n}" as "{n,n}". */
2461	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2462	: ((token->type == CHARACTER && token->opr.c == ',')
2463	? fetch_number (regexp, token, syntax) : -2));
2464	}
2465	if (BE (start == -2 \|\| end == -2, 0))
2466	{
2467	/* Invalid sequence. */
2468	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2469	{
2470	if (token->type == END_OF_RE)
2471	*err = REG_EBRACE;
2472	else
2473	*err = REG_BADBR;
2474
2475	return NULL;
2476	}
2477
2478	/* If the syntax bit is set, rollback. */
2479	re_string_set_index (regexp, start_idx);
2480	*token = start_token;
2481	token->type = CHARACTER;
2482	/* mb_partial and word_char bits should be already initialized by
2483	peek_token. */
2484	return elem;
2485	}
2486
2487	if (BE (end != -1 && start > end, 0))
2488	{
2489	/* First number greater than second. */
2490	*err = REG_BADBR;
2491	return NULL;
2492	}
2493	}
2494	else
2495	{
2496	start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2497	end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2498	}
2499
2500	fetch_token (token, regexp, syntax);
2501
2502	if (BE (elem == NULL, 0))
2503	return NULL;
2504	if (BE (start == 0 && end == 0, 0))
2505	{
2506	postorder (elem, free_tree, NULL);
2507	return NULL;
2508	}
2509
2510	/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2511	if (BE (start > 0, 0))
2512	{
2513	tree = elem;
2514	for (i = 2; i <= start; ++i)
2515	{
2516	elem = duplicate_tree (elem, dfa);
2517	tree = create_tree (dfa, tree, elem, CONCAT);
2518	if (BE (elem == NULL \|\| tree == NULL, 0))
2519	goto parse_dup_op_espace;
2520	}
2521
2522	if (start == end)
2523	return tree;
2524
2525	/* Duplicate ELEM before it is marked optional. */
2526	elem = duplicate_tree (elem, dfa);
2527	old_tree = tree;
2528	}
2529	else
2530	old_tree = NULL;
2531
2532	if (elem->token.type == SUBEXP)
2533	postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2534
2535	tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2536	if (BE (tree == NULL, 0))
2537	goto parse_dup_op_espace;
2538
2539	/* This loop is actually executed only when end != -1,
2540	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2541	already created the start+1-th copy. */
2542	for (i = start + 2; i <= end; ++i)
2543	{
2544	elem = duplicate_tree (elem, dfa);
2545	tree = create_tree (dfa, tree, elem, CONCAT);
2546	if (BE (elem == NULL \|\| tree == NULL, 0))
2547	goto parse_dup_op_espace;
2548
2549	tree = create_tree (dfa, tree, NULL, OP_ALT);
2550	if (BE (tree == NULL, 0))
2551	goto parse_dup_op_espace;
2552	}
2553
2554	if (old_tree)
2555	tree = create_tree (dfa, old_tree, tree, CONCAT);
2556
2557	return tree;
2558
2559	parse_dup_op_espace:
2560	*err = REG_ESPACE;
2561	return NULL;
2562	}
2563
2564	/* Size of the names for collating symbol/equivalence_class/character_class.
2565	I'm not sure, but maybe enough. */
2566	#define BRACKET_NAME_BUF_SIZE 32
2567
2568	#ifndef _LIBC
2569	/* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2570	Build the range expression which starts from START_ELEM, and ends
2571	at END_ELEM. The result are written to MBCSET and SBCSET.
2572	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2573	mbcset->range_ends, is a pointer argument sinse we may
2574	update it. */
2575
2576	static reg_errcode_t
2577	internal_function
2578	# ifdef RE_ENABLE_I18N
2579	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int range_alloc,
2580	bracket_elem_t start_elem, bracket_elem_t end_elem)
2581	# else /* not RE_ENABLE_I18N */
2582	build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2583	bracket_elem_t *end_elem)
2584	# endif /* not RE_ENABLE_I18N */
2585	{
2586	unsigned int start_ch, end_ch;
2587	/* Equivalence Classes and Character Classes can't be a range start/end. */
2588	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2589	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2590	0))
2591	return REG_ERANGE;
2592
2593	/* We can handle no multi character collating elements without libc
2594	support. */
2595	if (BE ((start_elem->type == COLL_SYM
2596	&& strlen ((char *) start_elem->opr.name) > 1)
2597	\|\| (end_elem->type == COLL_SYM
2598	&& strlen ((char *) end_elem->opr.name) > 1), 0))
2599	return REG_ECOLLATE;
2600
2601	# ifdef RE_ENABLE_I18N
2602	{
2603	wchar_t wc;
2604	wint_t start_wc;
2605	wint_t end_wc;
2606	wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2607
2608	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2609	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2610	: 0));
2611	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2612	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2613	: 0));
2614	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
2615	? __btowc (start_ch) : start_elem->opr.wch);
2616	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
2617	? __btowc (end_ch) : end_elem->opr.wch);
2618	if (start_wc == WEOF \|\| end_wc == WEOF)
2619	return REG_ECOLLATE;
2620	cmp_buf[0] = start_wc;
2621	cmp_buf[4] = end_wc;
2622	if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2623	return REG_ERANGE;
2624
2625	/* Got valid collation sequence values, add them as a new entry.
2626	However, for !_LIBC we have no collation elements: if the
2627	character set is single byte, the single byte character set
2628	that we build below suffices. parse_bracket_exp passes
2629	no MBCSET if dfa->mb_cur_max == 1. */
2630	if (mbcset)
2631	{
2632	/* Check the space of the arrays. */
2633	if (BE (*range_alloc == mbcset->nranges, 0))
2634	{
2635	/* There is not enough space, need realloc. */
2636	wchar_t new_array_start, new_array_end;
2637	int new_nranges;
2638
2639	/* +1 in case of mbcset->nranges is 0. */
2640	new_nranges = 2 * mbcset->nranges + 1;
2641	/* Use realloc since mbcset->range_starts and mbcset->range_ends
2642	are NULL if range_alloc == 0. /
2643	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2644	new_nranges);
2645	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2646	new_nranges);
2647
2648	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2649	return REG_ESPACE;
2650
2651	mbcset->range_starts = new_array_start;
2652	mbcset->range_ends = new_array_end;
2653	*range_alloc = new_nranges;
2654	}
2655
2656	mbcset->range_starts[mbcset->nranges] = start_wc;
2657	mbcset->range_ends[mbcset->nranges++] = end_wc;
2658	}
2659
2660	/* Build the table for single byte characters. */
2661	for (wc = 0; wc < SBC_MAX; ++wc)
2662	{
2663	cmp_buf[2] = wc;
2664	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2665	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2666	bitset_set (sbcset, wc);
2667	}
2668	}
2669	# else /* not RE_ENABLE_I18N */
2670	{
2671	unsigned int ch;
2672	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2673	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2674	: 0));
2675	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2676	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2677	: 0));
2678	if (start_ch > end_ch)
2679	return REG_ERANGE;
2680	/* Build the table for single byte characters. */
2681	for (ch = 0; ch < SBC_MAX; ++ch)
2682	if (start_ch <= ch && ch <= end_ch)
2683	bitset_set (sbcset, ch);
2684	}
2685	# endif /* not RE_ENABLE_I18N */
2686	return REG_NOERROR;
2687	}
2688	#endif /* not _LIBC */
2689
2690	#ifndef _LIBC
2691	/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2692	Build the collating element which is represented by NAME.
2693	The result are written to MBCSET and SBCSET.
2694	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2695	pointer argument since we may update it. */
2696
2697	static reg_errcode_t
2698	internal_function
2699	# ifdef RE_ENABLE_I18N
2700	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2701	int coll_sym_alloc, const unsigned char name)
2702	# else /* not RE_ENABLE_I18N */
2703	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2704	# endif /* not RE_ENABLE_I18N */
2705	{
2706	size_t name_len = strlen ((const char *) name);
2707	if (BE (name_len != 1, 0))
2708	return REG_ECOLLATE;
2709	else
2710	{
2711	bitset_set (sbcset, name[0]);
2712	return REG_NOERROR;
2713	}
2714	}
2715	#endif /* not _LIBC */
2716
2717	/* This function parse bracket expression like "[abc]", "[a-c]",
2718	"[[.a-a.]]" etc. */
2719
2720	static bin_tree_t *
2721	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
2722	reg_syntax_t syntax, reg_errcode_t *err)
2723	{
2724	#ifdef _LIBC
2725	const unsigned char *collseqmb;
2726	const char *collseqwc;
2727	uint32_t nrules;
2728	int32_t table_size;
2729	const int32_t *symb_table;
2730	const unsigned char *extra;
2731
2732	/* Local function for parse_bracket_exp used in _LIBC environement.
2733	Seek the collating symbol entry correspondings to NAME.
2734	Return the index of the symbol in the SYMB_TABLE. */
2735
2736	auto inline int32_t
2737	__attribute ((always_inline))
2738	seek_collating_symbol_entry (name, name_len)
2739	const unsigned char *name;
2740	size_t name_len;
2741	{
2742	int32_t hash = elem_hash ((const char *) name, name_len);
2743	int32_t elem = hash % table_size;
2744	if (symb_table[2 * elem] != 0)
2745	{
2746	int32_t second = hash % (table_size - 2) + 1;
2747
2748	do
2749	{
2750	/* First compare the hashing value. */
2751	if (symb_table[2 * elem] == hash
2752	/* Compare the length of the name. */
2753	&& name_len == extra[symb_table[2 * elem + 1]]
2754	/* Compare the name. */
2755	&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2756	name_len) == 0)
2757	{
2758	/* Yep, this is the entry. */
2759	break;
2760	}
2761
2762	/* Next entry. */
2763	elem += second;
2764	}
2765	while (symb_table[2 * elem] != 0);
2766	}
2767	return elem;
2768	}
2769
2770	/* Local function for parse_bracket_exp used in _LIBC environement.
2771	Look up the collation sequence value of BR_ELEM.
2772	Return the value if succeeded, UINT_MAX otherwise. */
2773
2774	auto inline unsigned int
2775	__attribute ((always_inline))
2776	lookup_collation_sequence_value (br_elem)
2777	bracket_elem_t *br_elem;
2778	{
2779	if (br_elem->type == SB_CHAR)
2780	{
2781	/*
2782	if (MB_CUR_MAX == 1)
2783	*/
2784	if (nrules == 0)
2785	return collseqmb[br_elem->opr.ch];
2786	else
2787	{
2788	wint_t wc = __btowc (br_elem->opr.ch);
2789	return __collseq_table_lookup (collseqwc, wc);
2790	}
2791	}
2792	else if (br_elem->type == MB_CHAR)
2793	{
2794	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2795	}
2796	else if (br_elem->type == COLL_SYM)
2797	{
2798	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2799	if (nrules != 0)
2800	{
2801	int32_t elem, idx;
2802	elem = seek_collating_symbol_entry (br_elem->opr.name,
2803	sym_name_len);
2804	if (symb_table[2 * elem] != 0)
2805	{
2806	/* We found the entry. */
2807	idx = symb_table[2 * elem + 1];
2808	/* Skip the name of collating element name. */
2809	idx += 1 + extra[idx];
2810	/* Skip the byte sequence of the collating element. */
2811	idx += 1 + extra[idx];
2812	/* Adjust for the alignment. */
2813	idx = (idx + 3) & ~3;
2814	/* Skip the multibyte collation sequence value. */
2815	idx += sizeof (unsigned int);
2816	/* Skip the wide char sequence of the collating element. */
2817	idx += sizeof (unsigned int) *
2818	(1 + (unsigned int ) (extra + idx));
2819	/* Return the collation sequence value. */
2820	return (unsigned int ) (extra + idx);
2821	}
2822	else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2823	{
2824	/* No valid character. Match it as a single byte
2825	character. */
2826	return collseqmb[br_elem->opr.name[0]];
2827	}
2828	}
2829	else if (sym_name_len == 1)
2830	return collseqmb[br_elem->opr.name[0]];
2831	}
2832	return UINT_MAX;
2833	}
2834
2835	/* Local function for parse_bracket_exp used in _LIBC environement.
2836	Build the range expression which starts from START_ELEM, and ends
2837	at END_ELEM. The result are written to MBCSET and SBCSET.
2838	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2839	mbcset->range_ends, is a pointer argument sinse we may
2840	update it. */
2841
2842	auto inline reg_errcode_t
2843	__attribute ((always_inline))
2844	build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2845	re_charset_t *mbcset;
2846	int *range_alloc;
2847	bitset_t sbcset;
2848	bracket_elem_t start_elem, end_elem;
2849	{
2850	unsigned int ch;
2851	uint32_t start_collseq;
2852	uint32_t end_collseq;
2853
2854	/* Equivalence Classes and Character Classes can't be a range
2855	start/end. */
2856	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2857	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2858	0))
2859	return REG_ERANGE;
2860
2861	start_collseq = lookup_collation_sequence_value (start_elem);
2862	end_collseq = lookup_collation_sequence_value (end_elem);
2863	/* Check start/end collation sequence values. */
2864	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, 0))
2865	return REG_ECOLLATE;
2866	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2867	return REG_ERANGE;
2868
2869	/* Got valid collation sequence values, add them as a new entry.
2870	However, if we have no collation elements, and the character set
2871	is single byte, the single byte character set that we
2872	build below suffices. */
2873	if (nrules > 0 \|\| dfa->mb_cur_max > 1)
2874	{
2875	/* Check the space of the arrays. */
2876	if (BE (*range_alloc == mbcset->nranges, 0))
2877	{
2878	/* There is not enough space, need realloc. */
2879	uint32_t *new_array_start;
2880	uint32_t *new_array_end;
2881	int new_nranges;
2882
2883	/* +1 in case of mbcset->nranges is 0. */
2884	new_nranges = 2 * mbcset->nranges + 1;
2885	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2886	new_nranges);
2887	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2888	new_nranges);
2889
2890	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2891	return REG_ESPACE;
2892
2893	mbcset->range_starts = new_array_start;
2894	mbcset->range_ends = new_array_end;
2895	*range_alloc = new_nranges;
2896	}
2897
2898	mbcset->range_starts[mbcset->nranges] = start_collseq;
2899	mbcset->range_ends[mbcset->nranges++] = end_collseq;
2900	}
2901
2902	/* Build the table for single byte characters. */
2903	for (ch = 0; ch < SBC_MAX; ch++)
2904	{
2905	uint32_t ch_collseq;
2906	/*
2907	if (MB_CUR_MAX == 1)
2908	*/
2909	if (nrules == 0)
2910	ch_collseq = collseqmb[ch];
2911	else
2912	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2913	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2914	bitset_set (sbcset, ch);
2915	}
2916	return REG_NOERROR;
2917	}
2918
2919	/* Local function for parse_bracket_exp used in _LIBC environement.
2920	Build the collating element which is represented by NAME.
2921	The result are written to MBCSET and SBCSET.
2922	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2923	pointer argument sinse we may update it. */
2924
2925	auto inline reg_errcode_t
2926	__attribute ((always_inline))
2927	build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2928	re_charset_t *mbcset;
2929	int *coll_sym_alloc;
2930	bitset_t sbcset;
2931	const unsigned char *name;
2932	{
2933	int32_t elem, idx;
2934	size_t name_len = strlen ((const char *) name);
2935	if (nrules != 0)
2936	{
2937	elem = seek_collating_symbol_entry (name, name_len);
2938	if (symb_table[2 * elem] != 0)
2939	{
2940	/* We found the entry. */
2941	idx = symb_table[2 * elem + 1];
2942	/* Skip the name of collating element name. */
2943	idx += 1 + extra[idx];
2944	}
2945	else if (symb_table[2 * elem] == 0 && name_len == 1)
2946	{
2947	/* No valid character, treat it as a normal
2948	character. */
2949	bitset_set (sbcset, name[0]);
2950	return REG_NOERROR;
2951	}
2952	else
2953	return REG_ECOLLATE;
2954
2955	/* Got valid collation sequence, add it as a new entry. */
2956	/* Check the space of the arrays. */
2957	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
2958	{
2959	/* Not enough, realloc it. */
2960	/* +1 in case of mbcset->ncoll_syms is 0. */
2961	int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
2962	/* Use realloc since mbcset->coll_syms is NULL
2963	if alloc == 0. /
2964	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2965	new_coll_sym_alloc);
2966	if (BE (new_coll_syms == NULL, 0))
2967	return REG_ESPACE;
2968	mbcset->coll_syms = new_coll_syms;
2969	*coll_sym_alloc = new_coll_sym_alloc;
2970	}
2971	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
2972	return REG_NOERROR;
2973	}
2974	else
2975	{
2976	if (BE (name_len != 1, 0))
2977	return REG_ECOLLATE;
2978	else
2979	{
2980	bitset_set (sbcset, name[0]);
2981	return REG_NOERROR;
2982	}
2983	}
2984	}
2985	#endif
2986
2987	re_token_t br_token;
2988	re_bitset_ptr_t sbcset;
2989	#ifdef RE_ENABLE_I18N
2990	re_charset_t *mbcset;
2991	int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
2992	int equiv_class_alloc = 0, char_class_alloc = 0;
2993	#endif /* not RE_ENABLE_I18N */
2994	int non_match = 0;
2995	bin_tree_t *work_tree;
2996	int token_len;
2997	int first_round = 1;
2998	#ifdef _LIBC
2999	collseqmb = (const unsigned char *)
3000	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3001	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3002	if (nrules)
3003	{
3004	/*
3005	if (MB_CUR_MAX > 1)
3006	*/
3007	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3008	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3009	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3010	_NL_COLLATE_SYMB_TABLEMB);
3011	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3012	_NL_COLLATE_SYMB_EXTRAMB);
3013	}
3014	#endif
3015	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3016	#ifdef RE_ENABLE_I18N
3017	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3018	#endif /* RE_ENABLE_I18N */
3019	#ifdef RE_ENABLE_I18N
3020	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3021	#else
3022	if (BE (sbcset == NULL, 0))
3023	#endif /* RE_ENABLE_I18N */
3024	{
3025	*err = REG_ESPACE;
3026	return NULL;
3027	}
3028
3029	token_len = peek_token_bracket (token, regexp, syntax);
3030	if (BE (token->type == END_OF_RE, 0))
3031	{
3032	*err = REG_BADPAT;
3033	goto parse_bracket_exp_free_return;
3034	}
3035	if (token->type == OP_NON_MATCH_LIST)
3036	{
3037	#ifdef RE_ENABLE_I18N
3038	mbcset->non_match = 1;
3039	#endif /* not RE_ENABLE_I18N */
3040	non_match = 1;
3041	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3042	bitset_set (sbcset, '\0');
3043	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3044	token_len = peek_token_bracket (token, regexp, syntax);
3045	if (BE (token->type == END_OF_RE, 0))
3046	{
3047	*err = REG_BADPAT;
3048	goto parse_bracket_exp_free_return;
3049	}
3050	}
3051
3052	/* We treat the first ']' as a normal character. */
3053	if (token->type == OP_CLOSE_BRACKET)
3054	token->type = CHARACTER;
3055
3056	while (1)
3057	{
3058	bracket_elem_t start_elem, end_elem;
3059	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3060	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3061	reg_errcode_t ret;
3062	int token_len2 = 0, is_range_exp = 0;
3063	re_token_t token2;
3064
3065	start_elem.opr.name = start_name_buf;
3066	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3067	syntax, first_round);
3068	if (BE (ret != REG_NOERROR, 0))
3069	{
3070	*err = ret;
3071	goto parse_bracket_exp_free_return;
3072	}
3073	first_round = 0;
3074
3075	/* Get information about the next token. We need it in any case. */
3076	token_len = peek_token_bracket (token, regexp, syntax);
3077
3078	/* Do not check for ranges if we know they are not allowed. */
3079	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3080	{
3081	if (BE (token->type == END_OF_RE, 0))
3082	{
3083	*err = REG_EBRACK;
3084	goto parse_bracket_exp_free_return;
3085	}
3086	if (token->type == OP_CHARSET_RANGE)
3087	{
3088	re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3089	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3090	if (BE (token2.type == END_OF_RE, 0))
3091	{
3092	*err = REG_EBRACK;
3093	goto parse_bracket_exp_free_return;
3094	}
3095	if (token2.type == OP_CLOSE_BRACKET)
3096	{
3097	/* We treat the last '-' as a normal character. */
3098	re_string_skip_bytes (regexp, -token_len);
3099	token->type = CHARACTER;
3100	}
3101	else
3102	is_range_exp = 1;
3103	}
3104	}
3105
3106	if (is_range_exp == 1)
3107	{
3108	end_elem.opr.name = end_name_buf;
3109	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3110	dfa, syntax, 1);
3111	if (BE (ret != REG_NOERROR, 0))
3112	{
3113	*err = ret;
3114	goto parse_bracket_exp_free_return;
3115	}
3116
3117	token_len = peek_token_bracket (token, regexp, syntax);
3118
3119	#ifdef _LIBC
3120	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3121	&start_elem, &end_elem);
3122	#else
3123	# ifdef RE_ENABLE_I18N
3124	*err = build_range_exp (sbcset,
3125	dfa->mb_cur_max > 1 ? mbcset : NULL,
3126	&range_alloc, &start_elem, &end_elem);
3127	# else
3128	*err = build_range_exp (sbcset, &start_elem, &end_elem);
3129	# endif
3130	#endif /* RE_ENABLE_I18N */
3131	if (BE (*err != REG_NOERROR, 0))
3132	goto parse_bracket_exp_free_return;
3133	}
3134	else
3135	{
3136	switch (start_elem.type)
3137	{
3138	case SB_CHAR:
3139	bitset_set (sbcset, start_elem.opr.ch);
3140	break;
3141	#ifdef RE_ENABLE_I18N
3142	case MB_CHAR:
3143	/* Check whether the array has enough space. */
3144	if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3145	{
3146	wchar_t *new_mbchars;
3147	/* Not enough, realloc it. */
3148	/* +1 in case of mbcset->nmbchars is 0. */
3149	mbchar_alloc = 2 * mbcset->nmbchars + 1;
3150	/* Use realloc since array is NULL if alloc == 0. /
3151	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3152	mbchar_alloc);
3153	if (BE (new_mbchars == NULL, 0))
3154	goto parse_bracket_exp_espace;
3155	mbcset->mbchars = new_mbchars;
3156	}
3157	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3158	break;
3159	#endif /* RE_ENABLE_I18N */
3160	case EQUIV_CLASS:
3161	*err = build_equiv_class (sbcset,
3162	#ifdef RE_ENABLE_I18N
3163	mbcset, &equiv_class_alloc,
3164	#endif /* RE_ENABLE_I18N */
3165	start_elem.opr.name);
3166	if (BE (*err != REG_NOERROR, 0))
3167	goto parse_bracket_exp_free_return;
3168	break;
3169	case COLL_SYM:
3170	*err = build_collating_symbol (sbcset,
3171	#ifdef RE_ENABLE_I18N
3172	mbcset, &coll_sym_alloc,
3173	#endif /* RE_ENABLE_I18N */
3174	start_elem.opr.name);
3175	if (BE (*err != REG_NOERROR, 0))
3176	goto parse_bracket_exp_free_return;
3177	break;
3178	case CHAR_CLASS:
3179	*err = build_charclass (regexp->trans, sbcset,
3180	#ifdef RE_ENABLE_I18N
3181	mbcset, &char_class_alloc,
3182	#endif /* RE_ENABLE_I18N */
3183	start_elem.opr.name, syntax);
3184	if (BE (*err != REG_NOERROR, 0))
3185	goto parse_bracket_exp_free_return;
3186	break;
3187	default:
3188	assert (0);
3189	break;
3190	}
3191	}
3192	if (BE (token->type == END_OF_RE, 0))
3193	{
3194	*err = REG_EBRACK;
3195	goto parse_bracket_exp_free_return;
3196	}
3197	if (token->type == OP_CLOSE_BRACKET)
3198	break;
3199	}
3200
3201	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3202
3203	/* If it is non-matching list. */
3204	if (non_match)
3205	bitset_not (sbcset);
3206
3207	#ifdef RE_ENABLE_I18N
3208	/* Ensure only single byte characters are set. */
3209	if (dfa->mb_cur_max > 1)
3210	bitset_mask (sbcset, dfa->sb_char);
3211
3212	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
3213	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3214	\|\| mbcset->non_match)))
3215	{
3216	bin_tree_t *mbc_tree;
3217	int sbc_idx;
3218	/* Build a tree for complex bracket. */
3219	dfa->has_mb_node = 1;
3220	br_token.type = COMPLEX_BRACKET;
3221	br_token.opr.mbcset = mbcset;
3222	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3223	if (BE (mbc_tree == NULL, 0))
3224	goto parse_bracket_exp_espace;
3225	for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3226	if (sbcset[sbc_idx])
3227	break;
3228	/* If there are no bits set in sbcset, there is no point
3229	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3230	if (sbc_idx < BITSET_WORDS)
3231	{
3232	/* Build a tree for simple bracket. */
3233	br_token.type = SIMPLE_BRACKET;
3234	br_token.opr.sbcset = sbcset;
3235	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3236	if (BE (work_tree == NULL, 0))
3237	goto parse_bracket_exp_espace;
3238
3239	/* Then join them by ALT node. */
3240	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3241	if (BE (work_tree == NULL, 0))
3242	goto parse_bracket_exp_espace;
3243	}
3244	else
3245	{
3246	re_free (sbcset);
3247	work_tree = mbc_tree;
3248	}
3249	}
3250	else
3251	#endif /* not RE_ENABLE_I18N */
3252	{
3253	#ifdef RE_ENABLE_I18N
3254	free_charset (mbcset);
3255	#endif
3256	/* Build a tree for simple bracket. */
3257	br_token.type = SIMPLE_BRACKET;
3258	br_token.opr.sbcset = sbcset;
3259	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3260	if (BE (work_tree == NULL, 0))
3261	goto parse_bracket_exp_espace;
3262	}
3263	return work_tree;
3264
3265	parse_bracket_exp_espace:
3266	*err = REG_ESPACE;
3267	parse_bracket_exp_free_return:
3268	re_free (sbcset);
3269	#ifdef RE_ENABLE_I18N
3270	free_charset (mbcset);
3271	#endif /* RE_ENABLE_I18N */
3272	return NULL;
3273	}
3274
3275	/* Parse an element in the bracket expression. */
3276
3277	static reg_errcode_t
3278	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
3279	re_token_t token, int token_len, re_dfa_t dfa,
3280	reg_syntax_t syntax, int accept_hyphen)
3281	{
3282	#ifdef RE_ENABLE_I18N
3283	int cur_char_size;
3284	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3285	if (cur_char_size > 1)
3286	{
3287	elem->type = MB_CHAR;
3288	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3289	re_string_skip_bytes (regexp, cur_char_size);
3290	return REG_NOERROR;
3291	}
3292	#endif /* RE_ENABLE_I18N */
3293	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3294	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
3295	\|\| token->type == OP_OPEN_EQUIV_CLASS)
3296	return parse_bracket_symbol (elem, regexp, token);
3297	if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3298	{
3299	/* A '-' must only appear as anything but a range indicator before
3300	the closing bracket. Everything else is an error. */
3301	re_token_t token2;
3302	(void) peek_token_bracket (&token2, regexp, syntax);
3303	if (token2.type != OP_CLOSE_BRACKET)
3304	/* The actual error value is not standardized since this whole
3305	case is undefined. But ERANGE makes good sense. */
3306	return REG_ERANGE;
3307	}
3308	elem->type = SB_CHAR;
3309	elem->opr.ch = token->opr.c;
3310	return REG_NOERROR;
3311	}
3312
3313	/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3314	such as [:<character_class>:], [.<collating_element>.], and
3315	[=<equivalent_class>=]. */
3316
3317	static reg_errcode_t
3318	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
3319	re_token_t *token)
3320	{
3321	unsigned char ch, delim = token->opr.c;
3322	int i = 0;
3323	if (re_string_eoi(regexp))
3324	return REG_EBRACK;
3325	for (;; ++i)
3326	{
3327	if (i >= BRACKET_NAME_BUF_SIZE)
3328	return REG_EBRACK;
3329	if (token->type == OP_OPEN_CHAR_CLASS)
3330	ch = re_string_fetch_byte_case (regexp);
3331	else
3332	ch = re_string_fetch_byte (regexp);
3333	if (re_string_eoi(regexp))
3334	return REG_EBRACK;
3335	if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3336	break;
3337	elem->opr.name[i] = ch;
3338	}
3339	re_string_skip_bytes (regexp, 1);
3340	elem->opr.name[i] = '\0';
3341	switch (token->type)
3342	{
3343	case OP_OPEN_COLL_ELEM:
3344	elem->type = COLL_SYM;
3345	break;
3346	case OP_OPEN_EQUIV_CLASS:
3347	elem->type = EQUIV_CLASS;
3348	break;
3349	case OP_OPEN_CHAR_CLASS:
3350	elem->type = CHAR_CLASS;
3351	break;
3352	default:
3353	break;
3354	}
3355	return REG_NOERROR;
3356	}
3357
3358	/* Helper function for parse_bracket_exp.
3359	Build the equivalence class which is represented by NAME.
3360	The result are written to MBCSET and SBCSET.
3361	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3362	is a pointer argument sinse we may update it. */
3363
3364	static reg_errcode_t
3365	#ifdef RE_ENABLE_I18N
3366	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3367	int equiv_class_alloc, const unsigned char name)
3368	#else /* not RE_ENABLE_I18N */
3369	build_equiv_class (bitset_t sbcset, const unsigned char *name)
3370	#endif /* not RE_ENABLE_I18N */
3371	{
3372	#ifdef _LIBC
3373	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3374	if (nrules != 0)
3375	{
3376	const int32_t table, indirect;
3377	const unsigned char weights, extra, *cp;
3378	unsigned char char_buf[2];
3379	int32_t idx1, idx2;
3380	unsigned int ch;
3381	size_t len;
3382	/* This #include defines a local function! */
3383	# include <locale/weight.h>
3384	/* Calculate the index for equivalence class. */
3385	cp = name;
3386	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3387	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3388	_NL_COLLATE_WEIGHTMB);
3389	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3390	_NL_COLLATE_EXTRAMB);
3391	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3392	_NL_COLLATE_INDIRECTMB);
3393	idx1 = findidx (&cp);
3394	if (BE (idx1 == 0 \|\| cp < name + strlen ((const char *) name), 0))
3395	/* This isn't a valid character. */
3396	return REG_ECOLLATE;
3397
3398	/* Build single byte matcing table for this equivalence class. */
3399	char_buf[1] = (unsigned char) '\0';
3400	len = weights[idx1];
3401	for (ch = 0; ch < SBC_MAX; ++ch)
3402	{
3403	char_buf[0] = ch;
3404	cp = char_buf;
3405	idx2 = findidx (&cp);
3406	/*
3407	idx2 = table[ch];
3408	*/
3409	if (idx2 == 0)
3410	/* This isn't a valid character. */
3411	continue;
3412	if (len == weights[idx2])
3413	{
3414	int cnt = 0;
3415	while (cnt <= len &&
3416	weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
3417	++cnt;
3418
3419	if (cnt > len)
3420	bitset_set (sbcset, ch);
3421	}
3422	}
3423	/* Check whether the array has enough space. */
3424	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3425	{
3426	/* Not enough, realloc it. */
3427	/* +1 in case of mbcset->nequiv_classes is 0. */
3428	int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3429	/* Use realloc since the array is NULL if alloc == 0. /
3430	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3431	int32_t,
3432	new_equiv_class_alloc);
3433	if (BE (new_equiv_classes == NULL, 0))
3434	return REG_ESPACE;
3435	mbcset->equiv_classes = new_equiv_classes;
3436	*equiv_class_alloc = new_equiv_class_alloc;
3437	}
3438	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3439	}
3440	else
3441	#endif /* _LIBC */
3442	{
3443	if (BE (strlen ((const char *) name) != 1, 0))
3444	return REG_ECOLLATE;
3445	bitset_set (sbcset, *name);
3446	}
3447	return REG_NOERROR;
3448	}
3449
3450	/* Helper function for parse_bracket_exp.
3451	Build the character class which is represented by NAME.
3452	The result are written to MBCSET and SBCSET.
3453	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3454	is a pointer argument sinse we may update it. */
3455
3456	static reg_errcode_t
3457	#ifdef RE_ENABLE_I18N
3458	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3459	re_charset_t mbcset, int char_class_alloc,
3460	const unsigned char *class_name, reg_syntax_t syntax)
3461	#else /* not RE_ENABLE_I18N */
3462	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3463	const unsigned char *class_name, reg_syntax_t syntax)
3464	#endif /* not RE_ENABLE_I18N */
3465	{
3466	int i;
3467	const char name = (const char ) class_name;
3468
3469	/* In case of REG_ICASE "upper" and "lower" match the both of
3470	upper and lower cases. */
3471	if ((syntax & RE_ICASE)
3472	&& (strcmp (name, "upper") == 0 \|\| strcmp (name, "lower") == 0))
3473	name = "alpha";
3474
3475	#ifdef RE_ENABLE_I18N
3476	/* Check the space of the arrays. */
3477	if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3478	{
3479	/* Not enough, realloc it. */
3480	/* +1 in case of mbcset->nchar_classes is 0. */
3481	int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3482	/* Use realloc since array is NULL if alloc == 0. /
3483	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3484	new_char_class_alloc);
3485	if (BE (new_char_classes == NULL, 0))
3486	return REG_ESPACE;
3487	mbcset->char_classes = new_char_classes;
3488	*char_class_alloc = new_char_class_alloc;
3489	}
3490	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3491	#endif /* RE_ENABLE_I18N */
3492
3493	#define BUILD_CHARCLASS_LOOP(ctype_func) \
3494	do { \
3495	if (BE (trans != NULL, 0)) \
3496	{ \
3497	for (i = 0; i < SBC_MAX; ++i) \
3498	if (ctype_func (i)) \
3499	bitset_set (sbcset, trans[i]); \
3500	} \
3501	else \
3502	{ \
3503	for (i = 0; i < SBC_MAX; ++i) \
3504	if (ctype_func (i)) \
3505	bitset_set (sbcset, i); \
3506	} \
3507	} while (0)
3508
3509	if (strcmp (name, "alnum") == 0)
3510	BUILD_CHARCLASS_LOOP (isalnum);
3511	else if (strcmp (name, "cntrl") == 0)
3512	BUILD_CHARCLASS_LOOP (iscntrl);
3513	else if (strcmp (name, "lower") == 0)
3514	BUILD_CHARCLASS_LOOP (islower);
3515	else if (strcmp (name, "space") == 0)
3516	BUILD_CHARCLASS_LOOP (isspace);
3517	else if (strcmp (name, "alpha") == 0)
3518	BUILD_CHARCLASS_LOOP (isalpha);
3519	else if (strcmp (name, "digit") == 0)
3520	BUILD_CHARCLASS_LOOP (isdigit);
3521	else if (strcmp (name, "print") == 0)
3522	BUILD_CHARCLASS_LOOP (isprint);
3523	else if (strcmp (name, "upper") == 0)
3524	BUILD_CHARCLASS_LOOP (isupper);
3525	else if (strcmp (name, "blank") == 0)
3526	BUILD_CHARCLASS_LOOP (isblank);
3527	else if (strcmp (name, "graph") == 0)
3528	BUILD_CHARCLASS_LOOP (isgraph);
3529	else if (strcmp (name, "punct") == 0)
3530	BUILD_CHARCLASS_LOOP (ispunct);
3531	else if (strcmp (name, "xdigit") == 0)
3532	BUILD_CHARCLASS_LOOP (isxdigit);
3533	else
3534	return REG_ECTYPE;
3535
3536	return REG_NOERROR;
3537	}
3538
3539	static bin_tree_t *
3540	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3541	const unsigned char *class_name,
3542	const unsigned char *extra, int non_match,
3543	reg_errcode_t *err)
3544	{
3545	re_bitset_ptr_t sbcset;
3546	#ifdef RE_ENABLE_I18N
3547	re_charset_t *mbcset;
3548	int alloc = 0;
3549	#endif /* not RE_ENABLE_I18N */
3550	reg_errcode_t ret;
3551	re_token_t br_token;
3552	bin_tree_t *tree;
3553
3554	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3555	#ifdef RE_ENABLE_I18N
3556	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3557	#endif /* RE_ENABLE_I18N */
3558
3559	#ifdef RE_ENABLE_I18N
3560	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3561	#else /* not RE_ENABLE_I18N */
3562	if (BE (sbcset == NULL, 0))
3563	#endif /* not RE_ENABLE_I18N */
3564	{
3565	*err = REG_ESPACE;
3566	return NULL;
3567	}
3568
3569	if (non_match)
3570	{
3571	#ifdef RE_ENABLE_I18N
3572	/*
3573	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3574	bitset_set(cset->sbcset, '\0');
3575	*/
3576	mbcset->non_match = 1;
3577	#endif /* not RE_ENABLE_I18N */
3578	}
3579
3580	/* We don't care the syntax in this case. */
3581	ret = build_charclass (trans, sbcset,
3582	#ifdef RE_ENABLE_I18N
3583	mbcset, &alloc,
3584	#endif /* RE_ENABLE_I18N */
3585	class_name, 0);
3586
3587	if (BE (ret != REG_NOERROR, 0))
3588	{
3589	re_free (sbcset);
3590	#ifdef RE_ENABLE_I18N
3591	free_charset (mbcset);
3592	#endif /* RE_ENABLE_I18N */
3593	*err = ret;
3594	return NULL;
3595	}
3596	/* \w match '_' also. */
3597	for (; *extra; extra++)
3598	bitset_set (sbcset, *extra);
3599
3600	/* If it is non-matching list. */
3601	if (non_match)
3602	bitset_not (sbcset);
3603
3604	#ifdef RE_ENABLE_I18N
3605	/* Ensure only single byte characters are set. */
3606	if (dfa->mb_cur_max > 1)
3607	bitset_mask (sbcset, dfa->sb_char);
3608	#endif
3609
3610	/* Build a tree for simple bracket. */
3611	br_token.type = SIMPLE_BRACKET;
3612	br_token.opr.sbcset = sbcset;
3613	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3614	if (BE (tree == NULL, 0))
3615	goto build_word_op_espace;
3616
3617	#ifdef RE_ENABLE_I18N
3618	if (dfa->mb_cur_max > 1)
3619	{
3620	bin_tree_t *mbc_tree;
3621	/* Build a tree for complex bracket. */
3622	br_token.type = COMPLEX_BRACKET;
3623	br_token.opr.mbcset = mbcset;
3624	dfa->has_mb_node = 1;
3625	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3626	if (BE (mbc_tree == NULL, 0))
3627	goto build_word_op_espace;
3628	/* Then join them by ALT node. */
3629	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3630	if (BE (mbc_tree != NULL, 1))
3631	return tree;
3632	}
3633	else
3634	{
3635	free_charset (mbcset);
3636	return tree;
3637	}
3638	#else /* not RE_ENABLE_I18N */
3639	return tree;
3640	#endif /* not RE_ENABLE_I18N */
3641
3642	build_word_op_espace:
3643	re_free (sbcset);
3644	#ifdef RE_ENABLE_I18N
3645	free_charset (mbcset);
3646	#endif /* RE_ENABLE_I18N */
3647	*err = REG_ESPACE;
3648	return NULL;
3649	}
3650
3651	/* This is intended for the expressions like "a{1,3}".
3652	Fetch a number from `input', and return the number.
3653	Return -1, if the number field is empty like "{,1}".
3654	Return -2, If an error is occured. */
3655
3656	static int
3657	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
3658	{
3659	int num = -1;
3660	unsigned char c;
3661	while (1)
3662	{
3663	fetch_token (token, input, syntax);
3664	c = token->opr.c;
3665	if (BE (token->type == END_OF_RE, 0))
3666	return -2;
3667	if (token->type == OP_CLOSE_DUP_NUM \|\| c == ',')
3668	break;
3669	num = ((token->type != CHARACTER \|\| c < '0' \|\| '9' < c \|\| num == -2)
3670	? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3671	num = (num > RE_DUP_MAX) ? -2 : num;
3672	}
3673	return num;
3674	}
3675
3676
3677	#ifdef RE_ENABLE_I18N
3678	static void
3679	free_charset (re_charset_t *cset)
3680	{
3681	re_free (cset->mbchars);
3682	# ifdef _LIBC
3683	re_free (cset->coll_syms);
3684	re_free (cset->equiv_classes);
3685	re_free (cset->range_starts);
3686	re_free (cset->range_ends);
3687	# endif
3688	re_free (cset->char_classes);
3689	re_free (cset);
3690	}
3691	#endif /* RE_ENABLE_I18N */
3692
3693
3694	/* Functions for binary tree operation. */
3695
3696	/* Create a tree node. */
3697
3698	static bin_tree_t *
3699	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3700	re_token_type_t type)
3701	{
3702	re_token_t t;
3703	t.type = type;
3704	return create_token_tree (dfa, left, right, &t);
3705	}
3706
3707	static bin_tree_t *
3708	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3709	const re_token_t *token)
3710	{
3711	bin_tree_t *tree;
3712	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3713	{
3714	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3715
3716	if (storage == NULL)
3717	return NULL;
3718	storage->next = dfa->str_tree_storage;
3719	dfa->str_tree_storage = storage;
3720	dfa->str_tree_storage_idx = 0;
3721	}
3722	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3723
3724	tree->parent = NULL;
3725	tree->left = left;
3726	tree->right = right;
3727	tree->token = *token;
3728	tree->token.duplicated = 0;
3729	tree->token.opt_subexp = 0;
3730	tree->first = NULL;
3731	tree->next = NULL;
3732	tree->node_idx = -1;
3733
3734	if (left != NULL)
3735	left->parent = tree;
3736	if (right != NULL)
3737	right->parent = tree;
3738	return tree;
3739	}
3740
3741	/* Mark the tree SRC as an optional subexpression.
3742	To be called from preorder or postorder. */
3743
3744	static reg_errcode_t
3745	mark_opt_subexp (void extra, bin_tree_t node)
3746	{
3747	int idx = (int) (long) extra;
3748	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3749	node->token.opt_subexp = 1;
3750
3751	return REG_NOERROR;
3752	}
3753
3754	/* Free the allocated memory inside NODE. */
3755
3756	static void
3757	free_token (re_token_t *node)
3758	{
3759	#ifdef RE_ENABLE_I18N
3760	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3761	free_charset (node->opr.mbcset);
3762	else
3763	#endif /* RE_ENABLE_I18N */
3764	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3765	re_free (node->opr.sbcset);
3766	}
3767
3768	/* Worker function for tree walking. Free the allocated memory inside NODE
3769	and its children. */
3770
3771	static reg_errcode_t
3772	free_tree (void extra, bin_tree_t node)
3773	{
3774	free_token (&node->token);
3775	return REG_NOERROR;
3776	}
3777
3778
3779	/* Duplicate the node SRC, and return new node. This is a preorder
3780	visit similar to the one implemented by the generic visitor, but
3781	we need more infrastructure to maintain two parallel trees --- so,
3782	it's easier to duplicate. */
3783
3784	static bin_tree_t *
3785	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
3786	{
3787	const bin_tree_t *node;
3788	bin_tree_t *dup_root;
3789	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
3790
3791	for (node = root; ; )
3792	{
3793	/* Create a new tree and link it back to the current parent. */
3794	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3795	if (*p_new == NULL)
3796	return NULL;
3797	(*p_new)->parent = dup_node;
3798	(*p_new)->token.duplicated = 1;
3799	dup_node = *p_new;
3800
3801	/* Go to the left node, or up and to the right. */
3802	if (node->left)
3803	{
3804	node = node->left;
3805	p_new = &dup_node->left;
3806	}
3807	else
3808	{
3809	const bin_tree_t *prev = NULL;
3810	while (node->right == prev \|\| node->right == NULL)
3811	{
3812	prev = node;
3813	node = node->parent;
3814	dup_node = dup_node->parent;
3815	if (!node)
3816	return dup_root;
3817	}
3818	node = node->right;
3819	p_new = &dup_node->right;
3820	}
3821	}
3822	}

Note: See TracBrowser for help on using the repository browser.

source: kBuild/trunk/src/sed/lib/regcomp.c@ 3550

Download in other formats: