Changeset 3613 in kBuild for trunk/src/sed/lib/regex_internal.h
- Timestamp:
- Sep 19, 2024 12:34:43 AM (7 months ago)
- Location:
- trunk/src/sed
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/sed
-
Property svn:mergeinfo
set to
/vendor/sed/current merged eligible
-
Property svn:mergeinfo
set to
-
trunk/src/sed/lib/regex_internal.h
r2660 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <[email protected]>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 20 #ifndef _REGEX_INTERNAL_H 22 21 #define _REGEX_INTERNAL_H 1 23 22 24 #include <assert.h>25 23 #include <ctype.h> 26 24 #include <stdio.h> … … 28 26 #include <string.h> 29 27 30 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC 31 # include <langinfo.h> 32 #endif 33 #if defined HAVE_LOCALE_H || defined _LIBC 34 # include <locale.h> 35 #endif 36 #if defined HAVE_WCHAR_H || defined _LIBC 37 # include <wchar.h> 38 #endif /* HAVE_WCHAR_H || _LIBC */ 39 #if defined HAVE_WCTYPE_H || defined _LIBC 40 # include <wctype.h> 41 #endif /* HAVE_WCTYPE_H || _LIBC */ 42 #if defined HAVE_STDBOOL_H || defined _LIBC || defined(__HAIKU__) /* haiku hack */ 43 # include <stdbool.h> 44 #endif /* HAVE_STDBOOL_H || _LIBC */ 45 #if defined _LIBC 46 # include <bits/libc-lock.h> 28 #include <langinfo.h> 29 #include <locale.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <stdint.h> 33 34 #ifndef _LIBC 35 # include <dynarray.h> 36 #endif 37 38 #include <intprops.h> 39 #include <verify.h> 40 41 #if defined DEBUG && DEBUG != 0 42 # include <assert.h> 43 # define DEBUG_ASSERT(x) assert (x) 47 44 #else 48 # define __libc_lock_define(CLASS,NAME) 49 # define __libc_lock_init(NAME) do { } while (0) 50 # define __libc_lock_lock(NAME) do { } while (0) 51 # define __libc_lock_unlock(NAME) do { } while (0) 45 # define DEBUG_ASSERT(x) assume (x) 46 #endif 47 48 #ifdef _LIBC 49 # include <libc-lock.h> 50 # define lock_define(name) __libc_lock_define (, name) 51 # define lock_init(lock) (__libc_lock_init (lock), 0) 52 # define lock_fini(lock) ((void) 0) 53 # define lock_lock(lock) __libc_lock_lock (lock) 54 # define lock_unlock(lock) __libc_lock_unlock (lock) 55 #elif defined GNULIB_LOCK && !defined GNULIB_REGEX_SINGLE_THREAD 56 # include "glthread/lock.h" 57 # define lock_define(name) gl_lock_define (, name) 58 # define lock_init(lock) glthread_lock_init (&(lock)) 59 # define lock_fini(lock) glthread_lock_destroy (&(lock)) 60 # define lock_lock(lock) glthread_lock_lock (&(lock)) 61 # define lock_unlock(lock) glthread_lock_unlock (&(lock)) 62 #elif defined GNULIB_PTHREAD && !defined GNULIB_REGEX_SINGLE_THREAD 63 # include <pthread.h> 64 # define lock_define(name) pthread_mutex_t name; 65 # define lock_init(lock) pthread_mutex_init (&(lock), 0) 66 # define lock_fini(lock) pthread_mutex_destroy (&(lock)) 67 # define lock_lock(lock) pthread_mutex_lock (&(lock)) 68 # define lock_unlock(lock) pthread_mutex_unlock (&(lock)) 69 #else 70 # define lock_define(name) 71 # define lock_init(lock) 0 72 # define lock_fini(lock) ((void) 0) 73 /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC. */ 74 # define lock_lock(lock) ((void) dfa) 75 # define lock_unlock(lock) ((void) 0) 52 76 #endif 53 77 54 78 /* In case that the system doesn't have isblank(). */ 55 #if !defined _LIBC && ! defined HAVE_ISBLANK && !defined isblank79 #if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK)) 56 80 # define isblank(ch) ((ch) == ' ' || (ch) == '\t') 81 #endif 82 83 /* regex code assumes isascii has its usual numeric meaning, 84 even if the portable character set uses EBCDIC encoding, 85 and even if wint_t is wider than int. */ 86 #ifndef _LIBC 87 # undef isascii 88 # define isascii(c) (((c) & ~0x7f) == 0) 57 89 #endif 58 90 … … 61 93 # define _RE_DEFINE_LOCALE_FUNCTIONS 1 62 94 # include <locale/localeinfo.h> 63 # include <locale/elem-hash.h>64 95 # include <locale/coll-lookup.h> 65 96 # endif … … 72 103 # undef gettext 73 104 # define gettext(msgid) \ 74 INTUSE(__dcgettext)(_libc_intl_domainname, msgid, LC_MESSAGES)105 __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) 75 106 # endif 76 107 #else 108 # undef gettext 77 109 # define gettext(msgid) (msgid) 78 110 #endif … … 84 116 #endif 85 117 86 #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC 87 # if defined(__OS2__) /* setlocale() misbehaves in LIBC 0.6.1 and earlier, breaking /[a-z]/. */ 88 # if defined(__KLIBC_VERSION__) 89 # if __KLIBC_VERSION__ >= 0x00060002 90 # define RE_ENABLE_I18N 91 # endif 92 # endif 93 # else 94 # define RE_ENABLE_I18N 95 # endif 96 #endif 97 98 #if __GNUC__ >= 3 99 # define BE(expr, val) __builtin_expect (expr, val) 100 #else 101 # define BE(expr, val) (expr) 102 # ifndef inline /* bird: silly since the rest of sed depends on this working.. */ 103 # define inline 104 # endif 105 #endif 106 107 /* Number of single byte character. */ 108 #define SBC_MAX 256 118 /* Number of ASCII characters. */ 119 #define ASCII_CHARS 0x80 120 121 /* Number of single byte characters. */ 122 #define SBC_MAX (UCHAR_MAX + 1) 109 123 110 124 #define COLL_ELEM_LEN_MAX 8 … … 116 130 /* Rename to standard API for using out of glibc. */ 117 131 #ifndef _LIBC 132 # undef __wctype 133 # undef __iswalnum 134 # undef __iswctype 135 # undef __towlower 136 # undef __towupper 118 137 # define __wctype wctype 138 # define __iswalnum iswalnum 119 139 # define __iswctype iswctype 140 # define __towlower towlower 141 # define __towupper towupper 120 142 # define __btowc btowc 121 # ifndef __mempcpy /* keep quiet if string.h defines it (bird) */ 122 # define __mempcpy mempcpy 123 # endif 143 # define __mbrtowc mbrtowc 124 144 # define __wcrtomb wcrtomb 125 145 # define __regfree regfree 126 # define attribute_hidden127 146 #endif /* not _LIBC */ 128 147 129 #ifdef __GNUC__ 130 # define __attribute(arg) __attribute__ (arg) 148 /* Types related to integers. Unless protected by #ifdef _LIBC, the 149 regex code should avoid exact-width types like int32_t and uint64_t 150 as some non-GCC platforms lack them, an issue when this code is 151 used in Gnulib. */ 152 153 #ifndef SSIZE_MAX 154 # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) 155 #endif 156 #ifndef ULONG_WIDTH 157 # define ULONG_WIDTH REGEX_UINTEGER_WIDTH (ULONG_MAX) 158 /* The number of usable bits in an unsigned integer type with maximum 159 value MAX, as an int expression suitable in #if. Cover all known 160 practical hosts. This implementation exploits the fact that MAX is 161 1 less than a power of 2, and merely counts the number of 1 bits in 162 MAX; "COBn" means "count the number of 1 bits in the low-order n bits". */ 163 # define REGEX_UINTEGER_WIDTH(max) REGEX_COB128 (max) 164 # define REGEX_COB128(n) (REGEX_COB64 ((n) >> 31 >> 31 >> 2) + REGEX_COB64 (n)) 165 # define REGEX_COB64(n) (REGEX_COB32 ((n) >> 31 >> 1) + REGEX_COB32 (n)) 166 # define REGEX_COB32(n) (REGEX_COB16 ((n) >> 16) + REGEX_COB16 (n)) 167 # define REGEX_COB16(n) (REGEX_COB8 ((n) >> 8) + REGEX_COB8 (n)) 168 # define REGEX_COB8(n) (REGEX_COB4 ((n) >> 4) + REGEX_COB4 (n)) 169 # define REGEX_COB4(n) (!!((n) & 8) + !!((n) & 4) + !!((n) & 2) + ((n) & 1)) 170 # if ULONG_MAX / 2 + 1 != 1ul << (ULONG_WIDTH - 1) 171 # error "ULONG_MAX out of range" 172 # endif 173 #endif 174 175 /* The type of indexes into strings. This is signed, not size_t, 176 since the API requires indexes to fit in regoff_t anyway, and using 177 signed integers makes the code a bit smaller and presumably faster. 178 The traditional GNU regex implementation uses int for indexes. 179 The POSIX-compatible implementation uses a possibly-wider type. 180 The name 'Idx' is three letters to minimize the hassle of 181 reindenting a lot of regex code that formerly used 'int'. */ 182 typedef regoff_t Idx; 183 #ifdef _REGEX_LARGE_OFFSETS 184 # define IDX_MAX SSIZE_MAX 131 185 #else 132 # define __attribute(arg) 133 #endif 134 135 #ifndef SIZE_MAX 136 #define SIZE_MAX ((size_t)-1) 137 #endif 138 139 extern const char __re_error_msgid[] attribute_hidden; 140 extern const size_t __re_error_msgid_idx[] attribute_hidden; 186 # define IDX_MAX INT_MAX 187 #endif 188 189 /* A hash value, suitable for computing hash tables. */ 190 typedef __re_size_t re_hashval_t; 141 191 142 192 /* An integer used to represent a set of bits. It must be unsigned, … … 146 196 #define BITSET_WORD_MAX ULONG_MAX 147 197 /* Number of bits in a bitset_word_t. */ 148 #define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) 149 /* Number of bitset_word_t in a bit_set. */ 150 #define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) 198 #define BITSET_WORD_BITS ULONG_WIDTH 199 200 /* Number of bitset_word_t values in a bitset_t. */ 201 #define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) 202 151 203 typedef bitset_word_t bitset_t[BITSET_WORDS]; 152 204 typedef bitset_word_t *re_bitset_ptr_t; 153 205 typedef const bitset_word_t *re_const_bitset_ptr_t; 154 155 #define bitset_set(set,i) \156 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)157 #define bitset_clear(set,i) \158 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))159 #define bitset_contain(set,i) \160 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))161 #define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))162 #define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))163 #define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))164 206 165 207 #define PREV_WORD_CONSTRAINT 0x0001 … … 190 232 typedef struct 191 233 { 192 intalloc;193 intnelem;194 int*elems;234 Idx alloc; 235 Idx nelem; 236 Idx *elems; 195 237 } re_node_set; 196 238 … … 205 247 OP_BACK_REF = 4, 206 248 OP_PERIOD = 5, 207 #ifdef RE_ENABLE_I18N208 249 COMPLEX_BRACKET = 6, 209 250 OP_UTF8_PERIOD = 7, 210 #endif /* RE_ENABLE_I18N */211 251 212 252 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used … … 246 286 } re_token_type_t; 247 287 248 #ifdef RE_ENABLE_I18N249 288 typedef struct 250 289 { … … 252 291 wchar_t *mbchars; 253 292 293 #ifdef _LIBC 254 294 /* Collating symbols. */ 255 # ifdef _LIBC256 295 int32_t *coll_syms; 257 # endif 258 296 #endif 297 298 #ifdef _LIBC 259 299 /* Equivalence classes. */ 260 # ifdef _LIBC261 300 int32_t *equiv_classes; 262 # 301 #endif 263 302 264 303 /* Range expressions. */ 265 # 304 #ifdef _LIBC 266 305 uint32_t *range_starts; 267 306 uint32_t *range_ends; 268 # else /* not _LIBC */307 #else 269 308 wchar_t *range_starts; 270 309 wchar_t *range_ends; 271 # endif /* not _LIBC */310 #endif 272 311 273 312 /* Character classes. */ … … 278 317 279 318 /* # of multibyte characters. */ 280 intnmbchars;319 Idx nmbchars; 281 320 282 321 /* # of collating symbols. */ 283 intncoll_syms;322 Idx ncoll_syms; 284 323 285 324 /* # of equivalence classes. */ 286 intnequiv_classes;325 Idx nequiv_classes; 287 326 288 327 /* # of range expressions. */ 289 intnranges;328 Idx nranges; 290 329 291 330 /* # of character classes. */ 292 intnchar_classes;331 Idx nchar_classes; 293 332 } re_charset_t; 294 #endif /* RE_ENABLE_I18N */295 333 296 334 typedef struct … … 300 338 unsigned char c; /* for CHARACTER */ 301 339 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ 302 #ifdef RE_ENABLE_I18N303 340 re_charset_t *mbcset; /* for COMPLEX_BRACKET */ 304 #endif /* RE_ENABLE_I18N */ 305 int idx; /* for BACK_REF */ 341 Idx idx; /* for BACK_REF */ 306 342 re_context_type ctx_type; /* for ANCHOR */ 307 343 } opr; 308 #if __GNUC__ >= 2344 #if (__GNUC__ >= 2 || defined __clang__) && !defined __STRICT_ANSI__ 309 345 re_token_type_t type : 8; 310 346 #else … … 314 350 unsigned int duplicated : 1; 315 351 unsigned int opt_subexp : 1; 316 #ifdef RE_ENABLE_I18N317 352 unsigned int accept_mb : 1; 318 353 /* These 2 bits can be moved into the union if needed (e.g. if running out 319 354 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ 320 355 unsigned int mb_partial : 1; 321 #endif322 356 unsigned int word_char : 1; 323 357 } re_token_t; … … 334 368 the same address that RAW_MBS points. */ 335 369 unsigned char *mbs; 336 #ifdef RE_ENABLE_I18N337 370 /* Store the wide character string which is corresponding to MBS. */ 338 371 wint_t *wcs; 339 int*offsets;372 Idx *offsets; 340 373 mbstate_t cur_state; 341 #endif342 374 /* Index in RAW_MBS. Each character mbs[i] corresponds to 343 375 raw_mbs[raw_mbs_idx + i]. */ 344 intraw_mbs_idx;376 Idx raw_mbs_idx; 345 377 /* The length of the valid characters in the buffers. */ 346 intvalid_len;378 Idx valid_len; 347 379 /* The corresponding number of bytes in raw_mbs array. */ 348 intvalid_raw_len;380 Idx valid_raw_len; 349 381 /* The length of the buffers MBS and WCS. */ 350 intbufs_len;382 Idx bufs_len; 351 383 /* The index in MBS, which is updated by re_string_fetch_byte. */ 352 intcur_idx;384 Idx cur_idx; 353 385 /* length of RAW_MBS array. */ 354 intraw_len;386 Idx raw_len; 355 387 /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ 356 intlen;388 Idx len; 357 389 /* End of the buffer may be shorter than its length in the cases such 358 390 as re_match_2, re_search_2. Then, we use STOP for end of the buffer 359 391 instead of LEN. */ 360 intraw_stop;392 Idx raw_stop; 361 393 /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ 362 intstop;394 Idx stop; 363 395 364 396 /* The context of mbs[0]. We store the context independently, since … … 370 402 /* Copy of re_dfa_t's word_char. */ 371 403 re_const_bitset_ptr_t word_char; 372 /* 1if REG_ICASE. */404 /* true if REG_ICASE. */ 373 405 unsigned char icase; 374 406 unsigned char is_utf8; … … 387 419 388 420 #ifndef _LIBC 389 # ifdef __i386__ 390 # ifdef __OS2__ 391 # define internal_function __attribute ((regparm (3))) 392 # else 393 # define internal_function __attribute ((regparm (3), stdcall)) 394 # endif 395 # else 396 # define internal_function 397 # endif 398 #endif 399 400 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 401 int new_buf_len) 402 internal_function; 403 #ifdef RE_ENABLE_I18N 404 static void build_wcs_buffer (re_string_t *pstr) internal_function; 405 static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; 406 #endif /* RE_ENABLE_I18N */ 407 static void build_upper_buffer (re_string_t *pstr) internal_function; 408 static void re_string_translate_buffer (re_string_t *pstr) internal_function; 409 static unsigned int re_string_context_at (const re_string_t *input, int idx, 410 int eflags) 411 internal_function __attribute ((pure)); 421 # define IS_IN(libc) false 422 #endif 423 412 424 #define re_string_peek_byte(pstr, offset) \ 413 425 ((pstr)->mbs[(pstr)->cur_idx + offset]) … … 427 439 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) 428 440 429 #if HAVE_ALLOCA_H 430 # include <alloca.h> 431 #elif HAVE_MALLOC_H 432 # include <malloc.h> 433 #endif 434 435 #ifndef _LIBC 436 # if HAVE_ALLOCA 437 /* The OS usually guarantees only one guard page at the bottom of the stack, 438 and a page size can be as small as 4096 bytes. So we cannot safely 439 allocate anything larger than 4096 bytes. Also care for the possibility 440 of a few compiler-allocated temporary stack slots. */ 441 # define __libc_use_alloca(n) ((n) < 4032) 442 # else 443 /* alloca is implemented with malloc, so just use malloc. */ 444 # define __libc_use_alloca(n) 0 445 # endif 441 #ifdef _LIBC 442 # define MALLOC_0_IS_NONNULL 1 443 #elif !defined MALLOC_0_IS_NONNULL 444 # define MALLOC_0_IS_NONNULL 0 445 #endif 446 447 #ifndef MAX 448 # define MAX(a,b) ((a) < (b) ? (b) : (a)) 449 #endif 450 #ifndef MIN 451 # define MIN(a,b) ((a) < (b) ? (a) : (b)) 446 452 #endif 447 453 … … 460 466 re_token_t token; 461 467 462 /* `node_idx' is the index in dfa->nodes, if `type' == 0.463 Otherwise `type' indicate the type of this node. */464 intnode_idx;468 /* 'node_idx' is the index in dfa->nodes, if 'type' == 0. 469 Otherwise 'type' indicate the type of this node. */ 470 Idx node_idx; 465 471 }; 466 472 typedef struct bin_tree_t bin_tree_t; … … 489 495 #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') 490 496 #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) 491 #define IS_WIDE_WORD_CHAR(ch) ( iswalnum (ch) || (ch) == L'_')497 #define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_') 492 498 #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) 493 499 … … 506 512 struct re_dfastate_t 507 513 { 508 unsigned int hash;514 re_hashval_t hash; 509 515 re_node_set nodes; 510 516 re_node_set non_eps_nodes; … … 514 520 unsigned int context : 4; 515 521 unsigned int halt : 1; 516 /* If this state can accept `multi byte'.522 /* If this state can accept "multi byte". 517 523 Note that we refer to multibyte characters, and multi character 518 collating elements as `multi byte'. */524 collating elements as "multi byte". */ 519 525 unsigned int accept_mb : 1; 520 526 /* If this state has backreference node(s). */ … … 526 532 struct re_state_table_entry 527 533 { 528 intnum;529 intalloc;534 Idx num; 535 Idx alloc; 530 536 re_dfastate_t **array; 531 537 }; … … 535 541 typedef struct 536 542 { 537 intnext_idx;538 intalloc;543 Idx next_idx; 544 Idx alloc; 539 545 re_dfastate_t **array; 540 546 } state_array_t; … … 544 550 typedef struct 545 551 { 546 intnode;547 intstr_idx; /* The position NODE match at. */552 Idx node; 553 Idx str_idx; /* The position NODE match at. */ 548 554 state_array_t path; 549 555 } re_sub_match_last_t; … … 555 561 typedef struct 556 562 { 557 intstr_idx;558 intnode;563 Idx str_idx; 564 Idx node; 559 565 state_array_t *path; 560 intalasts; /* Allocation size of LASTS. */561 intnlasts; /* The number of LASTS. */566 Idx alasts; /* Allocation size of LASTS. */ 567 Idx nlasts; /* The number of LASTS. */ 562 568 re_sub_match_last_t **lasts; 563 569 } re_sub_match_top_t; … … 565 571 struct re_backref_cache_entry 566 572 { 567 int node; 568 int str_idx; 569 int subexp_from; 570 int subexp_to; 573 Idx node; 574 Idx str_idx; 575 Idx subexp_from; 576 Idx subexp_to; 577 bitset_word_t eps_reachable_subexps_map; 571 578 char more; 572 char unused;573 unsigned short int eps_reachable_subexps_map;574 579 }; 575 580 … … 578 583 /* The string object corresponding to the input string. */ 579 584 re_string_t input; 580 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)581 585 const re_dfa_t *const dfa; 582 #else583 const re_dfa_t *dfa;584 #endif585 586 /* EFLAGS of the argument of regexec. */ 586 587 int eflags; 587 588 /* Where the matching ends. */ 588 intmatch_last;589 intlast_node;589 Idx match_last; 590 Idx last_node; 590 591 /* The state log used by the matcher. */ 591 592 re_dfastate_t **state_log; 592 intstate_log_top;593 Idx state_log_top; 593 594 /* Back reference cache. */ 594 intnbkref_ents;595 intabkref_ents;595 Idx nbkref_ents; 596 Idx abkref_ents; 596 597 struct re_backref_cache_entry *bkref_ents; 597 598 int max_mb_elem_len; 598 intnsub_tops;599 intasub_tops;599 Idx nsub_tops; 600 Idx asub_tops; 600 601 re_sub_match_top_t **sub_tops; 601 602 } re_match_context_t; … … 605 606 re_dfastate_t **sifted_states; 606 607 re_dfastate_t **limited_states; 607 intlast_node;608 intlast_str_idx;608 Idx last_node; 609 Idx last_str_idx; 609 610 re_node_set limits; 610 611 } re_sift_context_t; … … 612 613 struct re_fail_stack_ent_t 613 614 { 614 intidx;615 intnode;615 Idx idx; 616 Idx node; 616 617 regmatch_t *regs; 617 618 re_node_set eps_via_nodes; … … 620 621 struct re_fail_stack_t 621 622 { 622 intnum;623 intalloc;623 Idx num; 624 Idx alloc; 624 625 struct re_fail_stack_ent_t *stack; 625 626 }; … … 630 631 size_t nodes_alloc; 631 632 size_t nodes_len; 632 int*nexts;633 int*org_indices;633 Idx *nexts; 634 Idx *org_indices; 634 635 re_node_set *edests; 635 636 re_node_set *eclosures; … … 645 646 int str_tree_storage_idx; 646 647 647 /* number of subexpressions `re_nsub' is in regex_t. */648 unsigned int state_hash_mask;649 intinit_node;650 intnbackref; /* The number of backreference in this dfa. */648 /* number of subexpressions 're_nsub' is in regex_t. */ 649 re_hashval_t state_hash_mask; 650 Idx init_node; 651 Idx nbackref; /* The number of backreference in this dfa. */ 651 652 652 653 /* Bitmap expressing which backreference is used. */ … … 665 666 bitset_t word_char; 666 667 reg_syntax_t syntax; 667 int*subexp_map;668 Idx *subexp_map; 668 669 #ifdef DEBUG 669 670 char* re_str; 670 671 #endif 671 __libc_lock_define (,lock)672 lock_define (lock) 672 673 }; 673 674 … … 701 702 702 703 703 /* Inline functions for bitset operation. */ 704 /* Functions for bitset_t operation. */ 705 706 static inline void 707 bitset_set (bitset_t set, Idx i) 708 { 709 set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; 710 } 711 712 static inline void 713 bitset_clear (bitset_t set, Idx i) 714 { 715 set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); 716 } 717 718 static inline bool 719 bitset_contain (const bitset_t set, Idx i) 720 { 721 return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; 722 } 723 724 static inline void 725 bitset_empty (bitset_t set) 726 { 727 memset (set, '\0', sizeof (bitset_t)); 728 } 729 730 static inline void 731 bitset_set_all (bitset_t set) 732 { 733 memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); 734 if (SBC_MAX % BITSET_WORD_BITS != 0) 735 set[BITSET_WORDS - 1] = 736 ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; 737 } 738 739 static inline void 740 bitset_copy (bitset_t dest, const bitset_t src) 741 { 742 memcpy (dest, src, sizeof (bitset_t)); 743 } 744 704 745 static inline void 705 746 bitset_not (bitset_t set) 706 747 { 707 748 int bitset_i; 708 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)749 for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) 709 750 set[bitset_i] = ~set[bitset_i]; 751 if (SBC_MAX % BITSET_WORD_BITS != 0) 752 set[BITSET_WORDS - 1] = 753 ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) 754 & ~set[BITSET_WORDS - 1]); 710 755 } 711 756 … … 726 771 } 727 772 728 #ifdef RE_ENABLE_I18N 729 /* Inline functions for re_string. */ 730 static inline int 731 internal_function __attribute ((pure)) 732 re_string_char_size_at (const re_string_t *pstr, int idx) 773 /* Functions for re_string. */ 774 static int 775 __attribute__ ((pure, unused)) 776 re_string_char_size_at (const re_string_t *pstr, Idx idx) 733 777 { 734 778 int byte_idx; … … 741 785 } 742 786 743 static inlinewint_t744 internal_function __attribute ((pure))745 re_string_wchar_at (const re_string_t *pstr, intidx)787 static wint_t 788 __attribute__ ((pure, unused)) 789 re_string_wchar_at (const re_string_t *pstr, Idx idx) 746 790 { 747 791 if (pstr->mb_cur_max == 1) … … 750 794 } 751 795 796 #ifdef _LIBC 797 # include <locale/weight.h> 798 #endif 799 752 800 static int 753 internal_function __attribute ((pure))754 re_string_elem_size_at (const re_string_t *pstr, intidx)755 { 756 # 801 __attribute__ ((pure, unused)) 802 re_string_elem_size_at (const re_string_t *pstr, Idx idx) 803 { 804 #ifdef _LIBC 757 805 const unsigned char *p, *extra; 758 806 const int32_t *table, *indirect; 759 int32_t tmp;760 # include <locale/weight.h>761 807 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 762 808 … … 769 815 _NL_COLLATE_INDIRECTMB); 770 816 p = pstr->mbs + idx; 771 tmp = findidx (&p);817 findidx (table, indirect, extra, &p, pstr->len - idx); 772 818 return p - pstr->mbs - idx; 773 819 } 774 else 775 # endif /* _LIBC */ 776 return 1; 777 } 778 #endif /* RE_ENABLE_I18N */ 820 #endif /* _LIBC */ 821 822 return 1; 823 } 824 825 #ifdef _LIBC 826 # if __GNUC__ >= 7 827 # define FALLTHROUGH __attribute__ ((__fallthrough__)) 828 # else 829 # define FALLTHROUGH ((void) 0) 830 # endif 831 #else 832 # include "attribute.h" 833 #endif 779 834 780 835 #endif /* _REGEX_INTERNAL_H */
Note:
See TracChangeset
for help on using the changeset viewer.