VirtualBox

source: kBuild/trunk/src/grep/lib/exclude.c

Last change on this file was 3529, checked in by bird, 3 years ago

Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.

  • Property svn:eol-style set to native
File size: 17.1 KB
Line 
1/* exclude.c -- exclude file names
2
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2021 Free Software
4 Foundation, Inc.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19/* Written by Paul Eggert <[email protected]>
20 and Sergey Poznyakoff <[email protected]>.
21 Thanks to Phil Proudman <[email protected]>
22 for improvement suggestions. */
23
24#include <config.h>
25
26#include <stdbool.h>
27
28#include <ctype.h>
29#include <errno.h>
30#include <stddef.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <wctype.h>
35#include <regex.h>
36
37#include "exclude.h"
38#include "hash.h"
39#include "mbuiter.h"
40#include "fnmatch.h"
41#include "xalloc.h"
42#include "verify.h"
43#include "filename.h"
44
45#if GNULIB_EXCLUDE_SINGLE_THREAD
46# include "unlocked-io.h"
47#endif
48
49/* Non-GNU systems lack these options, so we don't need to check them. */
50#ifndef FNM_CASEFOLD
51# define FNM_CASEFOLD 0
52#endif
53#ifndef FNM_EXTMATCH
54# define FNM_EXTMATCH 0
55#endif
56#ifndef FNM_LEADING_DIR
57# define FNM_LEADING_DIR 0
58#endif
59
60verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
61 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
62 | FNM_CASEFOLD | FNM_EXTMATCH))
63 == 0);
64
65
66/* Exclusion patterns are grouped into a singly-linked list of
67 "exclusion segments". Each segment represents a set of patterns
68 that can be matches using the same algorithm. Non-wildcard
69 patterns are kept in hash tables, to speed up searches. Wildcard
70 patterns are stored as arrays of patterns. */
71
72
73/* An exclude pattern-options pair. The options are fnmatch options
74 ORed with EXCLUDE_* options. */
75
76struct patopts
77 {
78 int options;
79 union
80 {
81 char const *pattern;
82 regex_t re;
83 } v;
84 };
85
86/* An array of pattern-options pairs. */
87
88struct exclude_pattern
89 {
90 struct patopts *exclude;
91 idx_t exclude_alloc;
92 idx_t exclude_count;
93 };
94
95enum exclude_type
96 {
97 exclude_hash, /* a hash table of excluded names */
98 exclude_pattern /* an array of exclude patterns */
99 };
100
101struct exclude_segment
102 {
103 struct exclude_segment *next; /* next segment in list */
104 enum exclude_type type; /* type of this segment */
105 int options; /* common options for this segment */
106 union
107 {
108 Hash_table *table; /* for type == exclude_hash */
109 struct exclude_pattern pat; /* for type == exclude_pattern */
110 } v;
111 };
112
113struct pattern_buffer
114 {
115 struct pattern_buffer *next;
116 char *base;
117 };
118
119/* The exclude structure keeps a singly-linked list of exclude segments,
120 maintained in reverse order. */
121struct exclude
122 {
123 struct exclude_segment *head;
124 struct pattern_buffer *patbuf;
125 };
126
127/* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
128 add_exclude_file and add_exclude_fp below) can use this function
129 if it modifies the pattern, to ensure the allocated memory will be
130 properly reclaimed upon calling free_exclude. */
131void
132exclude_add_pattern_buffer (struct exclude *ex, char *buf)
133{
134 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
135 pbuf->base = buf;
136 pbuf->next = ex->patbuf;
137 ex->patbuf = pbuf;
138}
139
140/* Return true if STR has or may have wildcards, when matched with OPTIONS.
141 Return false if STR definitely does not have wildcards. */
142bool
143fnmatch_pattern_has_wildcards (const char *str, int options)
144{
145 while (1)
146 {
147 switch (*str++)
148 {
149 case '.':
150 case '{':
151 case '}':
152 case '(':
153 case ')':
154 if (options & EXCLUDE_REGEX)
155 return true;
156 break;
157
158 case '\\':
159 if (options & EXCLUDE_REGEX)
160 continue;
161 else
162 str += ! (options & FNM_NOESCAPE) && *str;
163 break;
164
165 case '+': case '@': case '!':
166 if (options & FNM_EXTMATCH && *str == '(')
167 return true;
168 break;
169
170 case '?': case '*': case '[':
171 return true;
172
173 case '\0':
174 return false;
175 }
176 }
177}
178
179static void
180unescape_pattern (char *str)
181{
182 char const *q = str;
183 do
184 q += *q == '\\' && q[1];
185 while ((*str++ = *q++));
186}
187
188/* Return a newly allocated and empty exclude list. */
189
190struct exclude *
191new_exclude (void)
192{
193 return xzalloc (sizeof *new_exclude ());
194}
195
196/* Calculate the hash of string. */
197static size_t
198string_hasher (void const *data, size_t n_buckets)
199{
200 char const *p = data;
201 return hash_string (p, n_buckets);
202}
203
204/* Ditto, for case-insensitive hashes */
205static size_t
206string_hasher_ci (void const *data, size_t n_buckets)
207{
208 char const *p = data;
209 mbui_iterator_t iter;
210 size_t value = 0;
211
212 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
213 {
214 mbchar_t m = mbui_cur (iter);
215 wchar_t wc;
216
217 if (m.wc_valid)
218 wc = towlower (m.wc);
219 else
220 wc = *m.ptr;
221
222 value = value * 31 + wc;
223 }
224
225 return value % n_buckets;
226}
227
228/* compare two strings for equality */
229static bool
230string_compare (void const *data1, void const *data2)
231{
232 char const *p1 = data1;
233 char const *p2 = data2;
234 return strcmp (p1, p2) == 0;
235}
236
237/* compare two strings for equality, case-insensitive */
238static bool
239string_compare_ci (void const *data1, void const *data2)
240{
241 char const *p1 = data1;
242 char const *p2 = data2;
243 return mbscasecmp (p1, p2) == 0;
244}
245
246static void
247string_free (void *data)
248{
249 free (data);
250}
251
252/* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
254static void
255new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
256{
257 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
258 sp->type = type;
259 sp->options = options;
260 switch (type)
261 {
262 case exclude_pattern:
263 break;
264
265 case exclude_hash:
266 sp->v.table = hash_initialize (0, NULL,
267 (options & FNM_CASEFOLD) ?
268 string_hasher_ci
269 : string_hasher,
270 (options & FNM_CASEFOLD) ?
271 string_compare_ci
272 : string_compare,
273 string_free);
274 break;
275 }
276 sp->next = ex->head;
277 ex->head = sp;
278}
279
280/* Free a single exclude segment */
281static void
282free_exclude_segment (struct exclude_segment *seg)
283{
284 switch (seg->type)
285 {
286 case exclude_pattern:
287 for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
288 {
289 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
290 regfree (&seg->v.pat.exclude[i].v.re);
291 }
292 free (seg->v.pat.exclude);
293 break;
294
295 case exclude_hash:
296 hash_free (seg->v.table);
297 break;
298 }
299 free (seg);
300}
301
302/* Free the storage associated with an exclude list. */
303void
304free_exclude (struct exclude *ex)
305{
306 struct exclude_segment *seg;
307 struct pattern_buffer *pbuf;
308
309 for (seg = ex->head; seg; )
310 {
311 struct exclude_segment *next = seg->next;
312 free_exclude_segment (seg);
313 seg = next;
314 }
315
316 for (pbuf = ex->patbuf; pbuf; )
317 {
318 struct pattern_buffer *next = pbuf->next;
319 free (pbuf->base);
320 free (pbuf);
321 pbuf = next;
322 }
323
324 free (ex);
325}
326
327/* Return zero if PATTERN matches F, obeying OPTIONS, except that
328 (unlike fnmatch) wildcards are disabled in PATTERN. */
329
330static int
331fnmatch_no_wildcards (char const *pattern, char const *f, int options)
332{
333 if (! (options & FNM_LEADING_DIR))
334 return ((options & FNM_CASEFOLD)
335 ? mbscasecmp (pattern, f)
336 : strcmp (pattern, f));
337 else if (! (options & FNM_CASEFOLD))
338 {
339 size_t patlen = strlen (pattern);
340 int r = strncmp (pattern, f, patlen);
341 if (! r)
342 {
343 r = f[patlen];
344 if (r == '/')
345 r = 0;
346 }
347 return r;
348 }
349 else
350 {
351 /* Walk through a copy of F, seeing whether P matches any prefix
352 of F.
353
354 FIXME: This is an O(N**2) algorithm; it should be O(N).
355 Also, the copy should not be necessary. However, fixing this
356 will probably involve a change to the mbs* API. */
357
358 char *fcopy = xstrdup (f);
359 char *p;
360 int r;
361 for (p = fcopy; ; *p++ = '/')
362 {
363 p = strchr (p, '/');
364 if (p)
365 *p = '\0';
366 r = mbscasecmp (pattern, fcopy);
367 if (!p || r <= 0)
368 break;
369 }
370 free (fcopy);
371 return r;
372 }
373}
374
375bool
376exclude_fnmatch (char const *pattern, char const *f, int options)
377{
378 int (*matcher) (char const *, char const *, int) =
379 (options & EXCLUDE_WILDCARDS
380 ? fnmatch
381 : fnmatch_no_wildcards);
382 bool matched = ((*matcher) (pattern, f, options) == 0);
383 char const *p;
384
385 if (! (options & EXCLUDE_ANCHORED))
386 for (p = f; *p && ! matched; p++)
387 if (*p == '/' && p[1] != '/')
388 matched = ((*matcher) (pattern, p + 1, options) == 0);
389
390 return matched;
391}
392
393static bool
394exclude_patopts (struct patopts const *opts, char const *f)
395{
396 int options = opts->options;
397
398 return (options & EXCLUDE_REGEX)
399 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
400 : exclude_fnmatch (opts->v.pattern, f, options);
401}
402
403/* Return true if the exclude_pattern segment SEG matches F. */
404
405static bool
406file_pattern_matches (struct exclude_segment const *seg, char const *f)
407{
408 idx_t exclude_count = seg->v.pat.exclude_count;
409 struct patopts const *exclude = seg->v.pat.exclude;
410
411 for (idx_t i = 0; i < exclude_count; i++)
412 {
413 if (exclude_patopts (exclude + i, f))
414 return true;
415 }
416 return false;
417}
418
419/* Return true if the exclude_hash segment SEG matches F.
420 BUFFER is an auxiliary storage of the same length as F (with nul
421 terminator included) */
422static bool
423file_name_matches (struct exclude_segment const *seg, char const *f,
424 char *buffer)
425{
426 int options = seg->options;
427 Hash_table *table = seg->v.table;
428
429 do
430 {
431 /* initialize the pattern */
432 strcpy (buffer, f);
433
434 while (1)
435 {
436 if (hash_lookup (table, buffer))
437 return true;
438 if (options & FNM_LEADING_DIR)
439 {
440 char *p = strrchr (buffer, '/');
441 if (p)
442 {
443 *p = 0;
444 continue;
445 }
446 }
447 break;
448 }
449
450 if (!(options & EXCLUDE_ANCHORED))
451 {
452 f = strchr (f, '/');
453 if (f)
454 f++;
455 }
456 else
457 break;
458 }
459 while (f);
460
461 return false;
462}
463
464/* Return true if EX excludes F. */
465
466bool
467excluded_file_name (struct exclude const *ex, char const *f)
468{
469 struct exclude_segment *seg;
470 bool invert = false;
471 char *filename = NULL;
472
473 /* If no patterns are given, the default is to include. */
474 if (!ex->head)
475 return false;
476
477 /* Scan through the segments, reporting the status of the first match.
478 The segments are in reverse order, so this reports the status of
479 the last match in the original option list. */
480 for (seg = ex->head; ; seg = seg->next)
481 {
482 if (seg->type == exclude_hash)
483 {
484 if (!filename)
485 filename = xmalloc (strlen (f) + 1);
486 if (file_name_matches (seg, f, filename))
487 break;
488 }
489 else
490 {
491 if (file_pattern_matches (seg, f))
492 break;
493 }
494
495 if (! seg->next)
496 {
497 /* If patterns are given but none match, the default is the
498 opposite of the last segment (i.e., the first in the
499 original option list). For example, in the command
500 'grep -r --exclude="a*" --include="*b" pat dir', the
501 first option is --exclude so any file name matching
502 neither a* nor *b is included. */
503 invert = true;
504 break;
505 }
506 }
507
508 free (filename);
509 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
510}
511
512/* Append to EX the exclusion PATTERN with OPTIONS. */
513
514void
515add_exclude (struct exclude *ex, char const *pattern, int options)
516{
517 struct exclude_segment *seg;
518 struct exclude_pattern *pat;
519 struct patopts *patopts;
520
521 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
522 && fnmatch_pattern_has_wildcards (pattern, options))
523 {
524 if (! (ex->head && ex->head->type == exclude_pattern
525 && ((ex->head->options & EXCLUDE_INCLUDE)
526 == (options & EXCLUDE_INCLUDE))))
527 new_exclude_segment (ex, exclude_pattern, options);
528
529 seg = ex->head;
530
531 pat = &seg->v.pat;
532 if (pat->exclude_count == pat->exclude_alloc)
533 pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
534 sizeof *pat->exclude);
535 patopts = &pat->exclude[pat->exclude_count++];
536
537 patopts->options = options;
538 if (options & EXCLUDE_REGEX)
539 {
540 int rc;
541 int cflags = REG_NOSUB|REG_EXTENDED|
542 ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
543
544 if (options & FNM_LEADING_DIR)
545 {
546 char *tmp;
547 idx_t len = strlen (pattern);
548
549 while (len > 0 && ISSLASH (pattern[len-1]))
550 --len;
551
552 if (len == 0)
553 rc = 1;
554 else
555 {
556 tmp = ximalloc (len + 7);
557 memcpy (tmp, pattern, len);
558 strcpy (tmp + len, "(/.*)?");
559 rc = regcomp (&patopts->v.re, tmp, cflags);
560 free (tmp);
561 }
562 }
563 else
564 rc = regcomp (&patopts->v.re, pattern, cflags);
565
566 if (rc)
567 {
568 pat->exclude_count--;
569 return;
570 }
571 }
572 else
573 {
574 if (options & EXCLUDE_ALLOC)
575 {
576 pattern = xstrdup (pattern);
577 exclude_add_pattern_buffer (ex, (char*) pattern);
578 }
579 patopts->v.pattern = pattern;
580 }
581 }
582 else
583 {
584 char *str, *p;
585 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
586 | FNM_LEADING_DIR | FNM_CASEFOLD);
587 if (! (ex->head && ex->head->type == exclude_hash
588 && ((ex->head->options & exclude_hash_flags)
589 == (options & exclude_hash_flags))))
590 new_exclude_segment (ex, exclude_hash, options);
591 seg = ex->head;
592
593 str = xstrdup (pattern);
594 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
595 unescape_pattern (str);
596 p = hash_insert (seg->v.table, str);
597 if (p != str)
598 free (str);
599 }
600}
601
602/* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
603 OPTIONS. LINE_END terminates each pattern in the file. If
604 LINE_END is a space character, ignore trailing spaces and empty
605 lines in FP. Return -1 on failure, 0 on success. */
606
607int
608add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
609 struct exclude *ex, FILE *fp, int options,
610 char line_end,
611 void *data)
612{
613 char *buf = NULL;
614 char *p;
615 char *pattern;
616 char const *lim;
617 idx_t buf_alloc = 0;
618 idx_t buf_count = 0;
619 int c;
620 int e = 0;
621
622 while ((c = getc (fp)) != EOF)
623 {
624 if (buf_count == buf_alloc)
625 buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
626 buf[buf_count++] = c;
627 }
628
629 if (ferror (fp))
630 e = errno;
631
632 buf = xirealloc (buf, buf_count + 1);
633 buf[buf_count] = line_end;
634 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
635
636 exclude_add_pattern_buffer (ex, buf);
637
638 pattern = buf;
639
640 for (p = buf; p < lim; p++)
641 if (*p == line_end)
642 {
643 char *pattern_end = p;
644
645 if (isspace ((unsigned char) line_end))
646 {
647 for (; ; pattern_end--)
648 if (pattern_end == pattern)
649 goto next_pattern;
650 else if (! isspace ((unsigned char) pattern_end[-1]))
651 break;
652 }
653
654 *pattern_end = '\0';
655 (*add_func) (ex, pattern, options, data);
656
657 next_pattern:
658 pattern = p + 1;
659 }
660
661 errno = e;
662 return e ? -1 : 0;
663}
664
665static void
666call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
667{
668 void (**addfnptr) (struct exclude *, char const *, int) = data;
669 (*addfnptr) (ex, pattern, options);
670}
671
672int
673add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
674 struct exclude *ex, char const *file_name, int options,
675 char line_end)
676{
677 bool use_stdin = file_name[0] == '-' && !file_name[1];
678 FILE *in;
679 int rc = 0;
680
681 if (use_stdin)
682 in = stdin;
683 else if (! (in = fopen (file_name, "re")))
684 return -1;
685
686 rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
687
688 if (!use_stdin && fclose (in) != 0)
689 rc = -1;
690
691 return rc;
692}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette