1 | /* dfa.h - declarations for GNU deterministic regexp compiler
|
---|
2 | Copyright (C) 1988, 1998, 2007, 2009-2022 Free Software Foundation, Inc.
|
---|
3 |
|
---|
4 | This program is free software; you can redistribute it and/or modify
|
---|
5 | it under the terms of the GNU General Public License as published by
|
---|
6 | the Free Software Foundation, either version 3, or (at your option)
|
---|
7 | any later version.
|
---|
8 |
|
---|
9 | This program is distributed in the hope that it will be useful,
|
---|
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
12 | GNU General Public License for more details.
|
---|
13 |
|
---|
14 | You should have received a copy of the GNU General Public License
|
---|
15 | along with this program; if not, write to the Free Software
|
---|
16 | Foundation, Inc.,
|
---|
17 | 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
|
---|
18 |
|
---|
19 | /* Written June, 1988 by Mike Haertel */
|
---|
20 |
|
---|
21 | #ifndef DFA_H_
|
---|
22 | #define DFA_H_
|
---|
23 |
|
---|
24 | #include "idx.h"
|
---|
25 | #include <regex.h>
|
---|
26 | #include <stddef.h>
|
---|
27 | #include <stdlib.h>
|
---|
28 |
|
---|
29 | #ifdef __cplusplus
|
---|
30 | extern "C" {
|
---|
31 | #endif
|
---|
32 |
|
---|
33 | struct localeinfo; /* See localeinfo.h. */
|
---|
34 |
|
---|
35 | /* Element of a list of strings, at least one of which is known to
|
---|
36 | appear in any R.E. matching the DFA. */
|
---|
37 | struct dfamust
|
---|
38 | {
|
---|
39 | bool exact;
|
---|
40 | bool begline;
|
---|
41 | bool endline;
|
---|
42 | char must[FLEXIBLE_ARRAY_MEMBER];
|
---|
43 | };
|
---|
44 |
|
---|
45 | /* The dfa structure. It is completely opaque. */
|
---|
46 | struct dfa;
|
---|
47 |
|
---|
48 | /* Needed when Gnulib is not used. */
|
---|
49 | #ifndef _GL_ATTRIBUTE_MALLOC
|
---|
50 | # define _GL_ATTRIBUTE_MALLOC
|
---|
51 | # define _GL_ATTRIBUTE_DEALLOC(f, i)
|
---|
52 | # define _GL_ATTRIBUTE_DEALLOC_FREE
|
---|
53 | # define _GL_ATTRIBUTE_RETURNS_NONNULL
|
---|
54 | #endif
|
---|
55 |
|
---|
56 | /* Entry points. */
|
---|
57 |
|
---|
58 | /* Allocate a struct dfa. The struct dfa is completely opaque.
|
---|
59 | It should be initialized via dfasyntax or dfacopysyntax before other use.
|
---|
60 | The returned pointer should be passed directly to free() after
|
---|
61 | calling dfafree() on it. */
|
---|
62 | extern struct dfa *dfaalloc (void)
|
---|
63 | _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE
|
---|
64 | _GL_ATTRIBUTE_RETURNS_NONNULL;
|
---|
65 |
|
---|
66 | /* DFA options that can be ORed together, for dfasyntax's 4th arg. */
|
---|
67 | enum
|
---|
68 | {
|
---|
69 | /* ^ and $ match only the start and end of data, and do not match
|
---|
70 | end-of-line within data. This is always false for grep, but
|
---|
71 | possibly true for other apps. */
|
---|
72 | DFA_ANCHOR = 1 << 0,
|
---|
73 |
|
---|
74 | /* '\0' in data is end-of-line, instead of the traditional '\n'. */
|
---|
75 | DFA_EOL_NUL = 1 << 1,
|
---|
76 |
|
---|
77 | /* Treat [:alpha:] etc. as an error at the top level, instead of
|
---|
78 | merely a warning. */
|
---|
79 | DFA_CONFUSING_BRACKETS_ERROR = 1 << 2,
|
---|
80 |
|
---|
81 | /* Warn about stray backslashes before ordinary characters other
|
---|
82 | than ] and } which are special because even though POSIX
|
---|
83 | says \] and \} have undefined interpretation, platforms
|
---|
84 | reliably ignore those stray backlashes and warning about them
|
---|
85 | would likely cause more trouble than it's worth. */
|
---|
86 | DFA_STRAY_BACKSLASH_WARN = 1 << 3,
|
---|
87 |
|
---|
88 | /* Warn about * appearing out of context at the start of an
|
---|
89 | expression or subexpression. */
|
---|
90 | DFA_STAR_WARN = 1 << 4,
|
---|
91 |
|
---|
92 | /* Warn about +, ?, {...} appearing out of context at the start of
|
---|
93 | an expression or subexpression. */
|
---|
94 | DFA_PLUS_WARN = 1 << 5,
|
---|
95 | };
|
---|
96 |
|
---|
97 | /* Initialize or reinitialize a DFA. The arguments are:
|
---|
98 | 1. The DFA to operate on.
|
---|
99 | 2. Information about the current locale.
|
---|
100 | 3. Syntax bits described in regex.h.
|
---|
101 | 4. Additional DFA options described above. */
|
---|
102 | extern void dfasyntax (struct dfa *, struct localeinfo const *,
|
---|
103 | reg_syntax_t, int);
|
---|
104 |
|
---|
105 | /* Initialize or reinitialize a DFA from an already-initialized DFA. */
|
---|
106 | extern void dfacopysyntax (struct dfa *, struct dfa const *);
|
---|
107 |
|
---|
108 | /* Parse the given string of given length into the given struct dfa. */
|
---|
109 | extern void dfaparse (char const *, idx_t, struct dfa *);
|
---|
110 |
|
---|
111 | struct dfamust;
|
---|
112 |
|
---|
113 | /* Free the storage held by the components of a struct dfamust. */
|
---|
114 | extern void dfamustfree (struct dfamust *);
|
---|
115 |
|
---|
116 | /* Allocate and return a struct dfamust from a struct dfa that was
|
---|
117 | initialized by dfaparse and not yet given to dfacomp. */
|
---|
118 | extern struct dfamust *dfamust (struct dfa const *)
|
---|
119 | _GL_ATTRIBUTE_DEALLOC (dfamustfree, 1);
|
---|
120 |
|
---|
121 | /* Compile the given string of the given length into the given struct dfa.
|
---|
122 | The last argument says whether to build a searching or an exact matcher.
|
---|
123 | A null first argument means the struct dfa has already been
|
---|
124 | initialized by dfaparse; the second argument is ignored. */
|
---|
125 | extern void dfacomp (char const *, idx_t, struct dfa *, bool);
|
---|
126 |
|
---|
127 | /* Search through a buffer looking for a match to the given struct dfa.
|
---|
128 | Find the first occurrence of a string matching the regexp in the
|
---|
129 | buffer, and the shortest possible version thereof. Return a pointer to
|
---|
130 | the first character after the match, or NULL if none is found. BEGIN
|
---|
131 | points to the beginning of the buffer, and END points to the first byte
|
---|
132 | after its end. Note however that we store a sentinel byte (usually
|
---|
133 | newline) in *END, so the actual buffer must be one byte longer.
|
---|
134 | When ALLOW_NL is true, newlines may appear in the matching string.
|
---|
135 | If COUNT is non-NULL, increment *COUNT once for each newline processed.
|
---|
136 | Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
|
---|
137 | encountered a back-reference. The caller can use this to decide
|
---|
138 | whether to fall back on a backtracking matcher. */
|
---|
139 | extern char *dfaexec (struct dfa *d, char const *begin, char *end,
|
---|
140 | bool allow_nl, idx_t *count, bool *backref);
|
---|
141 |
|
---|
142 | /* Return a superset for D. The superset matches everything that D
|
---|
143 | matches, along with some other strings (though the latter should be
|
---|
144 | rare, for efficiency reasons). Return a null pointer if no useful
|
---|
145 | superset is available. */
|
---|
146 | extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
|
---|
147 |
|
---|
148 | /* The DFA is likely to be fast. */
|
---|
149 | extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
---|
150 |
|
---|
151 | /* Return true if every construct in D is supported by this DFA matcher. */
|
---|
152 | extern bool dfasupported (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
---|
153 |
|
---|
154 | /* Free the storage held by the components of a struct dfa. */
|
---|
155 | extern void dfafree (struct dfa *);
|
---|
156 |
|
---|
157 | /* Error handling. */
|
---|
158 |
|
---|
159 | /* dfawarn() is called by the regexp routines whenever a regex is compiled
|
---|
160 | that likely doesn't do what the user wanted. It takes a single
|
---|
161 | argument, a NUL-terminated string describing the situation. The user
|
---|
162 | must supply a dfawarn. */
|
---|
163 | extern void dfawarn (const char *);
|
---|
164 |
|
---|
165 | /* dfaerror() is called by the regexp routines whenever an error occurs. It
|
---|
166 | takes a single argument, a NUL-terminated string describing the error.
|
---|
167 | The user must supply a dfaerror. */
|
---|
168 | extern _Noreturn void dfaerror (const char *);
|
---|
169 |
|
---|
170 | #ifdef __cplusplus
|
---|
171 | }
|
---|
172 | #endif
|
---|
173 |
|
---|
174 | #endif /* dfa.h */
|
---|