VirtualBox

source: kBuild/vendor/grep/current/src/searchutils.c@ 3630

Last change on this file since 3630 was 3529, checked in by bird, 3 years ago

Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.

  • Property svn:eol-style set to native
File size: 5.4 KB
Line 
1/* searchutils.c - helper subroutines for grep's matchers.
2 Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19#include <config.h>
20
21#define SEARCH_INLINE _GL_EXTERN_INLINE
22#define SYSTEM_INLINE _GL_EXTERN_INLINE
23#include "search.h"
24
25/* For each byte B, sbwordchar[B] is true if B is a single-byte
26 character that is a word constituent, and is false otherwise. */
27static bool sbwordchar[NCHAR];
28
29/* Whether -w considers WC to be a word constituent. */
30static bool
31wordchar (wint_t wc)
32{
33 return wc == L'_' || iswalnum (wc);
34}
35
36void
37wordinit (void)
38{
39 for (int i = 0; i < NCHAR; i++)
40 sbwordchar[i] = wordchar (localeinfo.sbctowc[i]);
41}
42
43kwset_t
44kwsinit (bool mb_trans)
45{
46 char *trans = NULL;
47
48 if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
49 {
50 trans = xmalloc (NCHAR);
51 /* If I is a single-byte character that becomes a different
52 single-byte character when uppercased, set trans[I]
53 to that character. Otherwise, set trans[I] to I. */
54 for (int i = 0; i < NCHAR; i++)
55 trans[i] = toupper (i);
56 }
57
58 return kwsalloc (trans);
59}
60
61/* In the buffer *MB_START, return the number of bytes needed to go
62 back from CUR to the previous boundary, where a "boundary" is the
63 start of a multibyte character or is an error-encoding byte. The
64 buffer ends at END (i.e., one past the address of the buffer's last
65 byte). If CUR is already at a boundary, return 0. If CUR is no
66 larger than *MB_START, return CUR - *MB_START without modifying
67 *MB_START or *MBCLEN.
68
69 When returning zero, set *MB_START to CUR. When returning a
70 positive value, set *MB_START to the next boundary after CUR,
71 or to END if there is no such boundary, and set *MBCLEN to the
72 length of the preceding character. */
73ptrdiff_t
74mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
75 char const *end)
76{
77 const char *p = *mb_start;
78 const char *p0 = p;
79 size_t clen;
80
81 if (cur <= p)
82 return cur - p;
83
84 if (localeinfo.using_utf8)
85 {
86 p = cur;
87 clen = 1;
88
89 if (cur < end && (*cur & 0xc0) == 0x80)
90 for (int i = 1; i <= 3; i++)
91 if ((cur[-i] & 0xc0) != 0x80)
92 {
93 mbstate_t mbs = { 0 };
94 clen = mb_clen (cur - i, end - (cur - i), &mbs);
95 if (i < clen && clen < (size_t) -2)
96 {
97 p0 = cur - i;
98 p = p0 + clen;
99 }
100 break;
101 }
102 }
103 else
104 {
105 mbstate_t mbs = { 0 };
106 do
107 {
108 clen = mb_clen (p, end - p, &mbs);
109
110 if ((size_t) -2 <= clen)
111 {
112 /* An invalid sequence, or a truncated multibyte character.
113 Treat it as a single byte character. */
114 clen = 1;
115 memset (&mbs, 0, sizeof mbs);
116 }
117 p0 = p;
118 p += clen;
119 }
120 while (p < cur);
121 }
122
123 *mb_start = p;
124 if (mbclen)
125 *mbclen = clen;
126 return p == cur ? 0 : cur - p0;
127}
128
129/* Examine the start of BUF (which goes to END) for word constituents.
130 If COUNTALL, examine as many as possible; otherwise, examine at most one.
131 Return the total number of bytes in the examined characters. */
132static size_t
133wordchars_count (char const *buf, char const *end, bool countall)
134{
135 size_t n = 0;
136 mbstate_t mbs = { 0 };
137 while (n < end - buf)
138 {
139 unsigned char b = buf[n];
140 if (sbwordchar[b])
141 n++;
142 else if (localeinfo.sbclen[b] != -2)
143 break;
144 else
145 {
146 wchar_t wc = 0;
147 size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
148 if (!wordchar (wc))
149 break;
150 n += wcbytes + !wcbytes;
151 }
152 if (!countall)
153 break;
154 }
155 return n;
156}
157
158/* Examine the start of BUF for the longest prefix containing just
159 word constituents. Return the total number of bytes in the prefix.
160 The buffer ends at END. */
161size_t
162wordchars_size (char const *buf, char const *end)
163{
164 return wordchars_count (buf, end, true);
165}
166
167/* If BUF starts with a word constituent, return the number of bytes
168 used to represent it; otherwise, return zero. The buffer ends at END. */
169size_t
170wordchar_next (char const *buf, char const *end)
171{
172 return wordchars_count (buf, end, false);
173}
174
175/* In the buffer BUF, return nonzero if the character whose encoding
176 contains the byte before CUR is a word constituent. The buffer
177 ends at END. */
178size_t
179wordchar_prev (char const *buf, char const *cur, char const *end)
180{
181 if (buf == cur)
182 return 0;
183 unsigned char b = *--cur;
184 if (! localeinfo.multibyte
185 || (localeinfo.using_utf8 && localeinfo.sbclen[b] == 1))
186 return sbwordchar[b];
187 char const *p = buf;
188 cur -= mb_goback (&p, NULL, cur, end);
189 return wordchar_next (cur, end);
190}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette