VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 2912

Last change on this file since 2912 was 2799, checked in by bird, 10 years ago

Fixed glob.h inclusion issue causing stack corruption. Fixed alignment issue in the string expansion compiler. More makefile eval 'compiler' work.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.5 KB
Line 
1/* $Id: strcache2.c 2799 2015-09-19 20:36:31Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008-2010 knut st. osmundsen <[email protected]>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild. If not, see <http://www.gnu.org/licenses/>
23 *
24 */
25
26/*******************************************************************************
27* Header Files *
28*******************************************************************************/
29#include "make.h"
30#include "strcache2.h"
31
32#include <assert.h>
33
34#include "debug.h"
35
36#ifdef _MSC_VER
37typedef unsigned char uint8_t;
38typedef unsigned short uint16_t;
39typedef unsigned int uint32_t;
40typedef signed char int8_t;
41typedef signed short int16_t;
42typedef signed int int32_t;
43#else
44# include <stdint.h>
45#endif
46
47#ifdef WINDOWS32
48# include <io.h>
49# include <process.h>
50# include <Windows.h>
51# define PARSE_IN_WORKER
52#endif
53
54#ifdef __OS2__
55# include <sys/fmutex.h>
56#endif
57
58#ifdef HAVE_PTHREAD
59# include <pthread.h>
60#endif
61
62
63/*******************************************************************************
64* Defined Constants And Macros *
65*******************************************************************************/
66/* The default size of a memory segment (1MB). */
67#define STRCACHE2_SEG_SIZE (1024U*1024U)
68/* The default hash table shift (hash size give as a power of two). */
69#define STRCACHE2_HASH_SHIFT 16
70/** Does the modding / masking of a hash number into an index. */
71#ifdef STRCACHE2_USE_MASK
72# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73#else
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75#endif
76
77# if ( defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
78 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)) \
79 && !defined(GCC_ADDRESS_SANITIZER)
80# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
81# else
82 /* (endian doesn't matter) */
83# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
84 | (((const uint8_t *)(ptr))[1]) )
85# endif
86
87
88/*******************************************************************************
89* Global Variables *
90*******************************************************************************/
91/* List of initialized string caches. */
92static struct strcache2 *strcache_head;
93
94
95/** Finds the closest primary number for power of two value (or something else
96 * useful if not support). */
97MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
98{
99 switch (shift)
100 {
101 case 5: return 31;
102 case 6: return 61;
103 case 7: return 127;
104 case 8: return 251;
105 case 9: return 509;
106 case 10: return 1021;
107 case 11: return 2039;
108 case 12: return 4093;
109 case 13: return 8191;
110 case 14: return 16381;
111 case 15: return 32749;
112 case 16: return 65521;
113 case 17: return 131063;
114 case 18: return 262139;
115 case 19: return 524269;
116 case 20: return 1048573;
117 case 21: return 2097143;
118 case 22: return 4194301;
119 case 23: return 8388593;
120
121 default:
122 assert (0);
123 return (1 << shift) - 1;
124 }
125}
126
127/* The following is a bit experiment. It produces longer chains, i.e. worse
128 distribution of the strings in the table, however the actual make
129 performances is better (<time). The explanation is probably that the
130 collisions only really increase for entries that aren't looked up that
131 much and that it actually improoves the situation for those that is. Or
132 that we spend so much less time hashing that it makes up (and more) for
133 the pentalty we suffer from the longer chains and worse distribution.
134
135 XXX: Check how this works out with different path lengths. I suspect it
136 might depend on the length of PATH_ROOT and the depth of the files
137 in the project as well. If it does, this might make matters worse
138 for some and better for others which isn't very cool... */
139
140#if 0
141# define BIG_HASH_SIZE 32 /* kinda fast */
142# define BIG_HASH_HEAD 16
143# define BIG_HASH_TAIL 12
144#elif 0
145# define BIG_HASH_SIZE 68 /* kinda safe */
146# define BIG_HASH_HEAD 24
147# define BIG_HASH_TAIL 24
148#elif 0
149# define BIG_HASH_SIZE 128 /* safe */
150# define BIG_HASH_HEAD 32
151# define BIG_HASH_TAIL 32
152#endif
153
154#ifdef BIG_HASH_SIZE
155/* long string: hash head and tail, drop the middle. */
156MY_INLINE unsigned int
157strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
158{
159 uint32_t hash = len;
160 uint32_t tmp;
161 unsigned int head;
162
163 /* head BIG_HASH_HEAD bytes */
164 head = (BIG_HASH_HEAD >> 2);
165 while (head-- > 0)
166 {
167 hash += strcache2_get_unaligned_16bits (str);
168 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
169 hash = (hash << 16) ^ tmp;
170 str += 2 * sizeof (uint16_t);
171 hash += hash >> 11;
172 }
173
174 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
175 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
176 head = (BIG_HASH_TAIL >> 2);
177 while (head-- > 0)
178 {
179 hash += strcache2_get_unaligned_16bits (str);
180 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
181 hash = (hash << 16) ^ tmp;
182 str += 2 * sizeof (uint16_t);
183 hash += hash >> 11;
184 }
185
186 /* force "avalanching" of final 127 bits. */
187 hash ^= hash << 3;
188 hash += hash >> 5;
189 hash ^= hash << 4;
190 hash += hash >> 17;
191 hash ^= hash << 25;
192 hash += hash >> 6;
193
194 return hash;
195}
196#endif /* BIG_HASH_SIZE */
197
198MY_INLINE unsigned int
199strcache2_case_sensitive_hash (const char *str, unsigned int len)
200{
201#if 1
202 /* Paul Hsieh hash SuperFast function:
203 http://www.azillionmonkeys.com/qed/hash.html
204
205 This performs very good and as a sligtly better distribution than
206 STRING_N_HASH_1 on a typical kBuild run.
207
208 It is also 37% faster than return_STRING_N_HASH_1 when running the
209 two 100 times over typical kBuild strings that end up here (did a
210 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
211 with -O2.
212
213 FIXME: A path for well aligned data should be added to speed up
214 execution on alignment sensitive systems. */
215 unsigned int rem;
216 uint32_t hash;
217 uint32_t tmp;
218
219 assert (sizeof (uint8_t) == sizeof (char));
220
221# ifdef BIG_HASH_SIZE
222 /* long string? */
223# if 0 /*BIG_HASH_SIZE > 128*/
224 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
225# else
226 if (len >= BIG_HASH_SIZE)
227# endif
228 return strcache2_case_sensitive_hash_big (str, len);
229# endif
230
231 /* short string: main loop, walking on 2 x uint16_t */
232 hash = len;
233 rem = len & 3;
234 len >>= 2;
235 while (len > 0)
236 {
237 hash += strcache2_get_unaligned_16bits (str);
238 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
239 hash = (hash << 16) ^ tmp;
240 str += 2 * sizeof (uint16_t);
241 hash += hash >> 11;
242 len--;
243 }
244
245 /* the remainder */
246 switch (rem)
247 {
248 case 3:
249 hash += strcache2_get_unaligned_16bits (str);
250 hash ^= hash << 16;
251 hash ^= str[sizeof (uint16_t)] << 18;
252 hash += hash >> 11;
253 break;
254 case 2:
255 hash += strcache2_get_unaligned_16bits (str);
256 hash ^= hash << 11;
257 hash += hash >> 17;
258 break;
259 case 1:
260 hash += *str;
261 hash ^= hash << 10;
262 hash += hash >> 1;
263 break;
264 }
265
266 /* force "avalanching" of final 127 bits. */
267 hash ^= hash << 3;
268 hash += hash >> 5;
269 hash ^= hash << 4;
270 hash += hash >> 17;
271 hash ^= hash << 25;
272 hash += hash >> 6;
273
274 return hash;
275
276#elif 1
277 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
278 when running the two 100 times over typical kBuild strings that
279 end up here (did a fprintf here and built kBuild).
280 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
281
282 unsigned int hash = 0;
283 if (MY_PREDICT_TRUE(len >= 2))
284 {
285 unsigned int ch0 = *str++;
286 hash = 0;
287 len--;
288 while (len >= 2)
289 {
290 unsigned int ch1 = *str++;
291 hash += ch0 << (ch1 & 0xf);
292
293 ch0 = *str++;
294 hash += ch1 << (ch0 & 0xf);
295
296 len -= 2;
297 }
298 if (len == 1)
299 {
300 unsigned ch1 = *str;
301 hash += ch0 << (ch1 & 0xf);
302
303 hash += ch1;
304 }
305 else
306 hash += ch0;
307 }
308 else if (len)
309 {
310 hash = *str;
311 hash += hash << (hash & 0xf);
312 }
313 else
314 hash = 0;
315 return hash;
316
317#elif 1
318# if 0
319 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
320 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
321 first in the assignment made it noticably slower.)
322
323 However, it is noticably slower in practice, most likely because of more
324 collisions. Hrmpf. */
325
326# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
327 unsigned int hash = 0;
328
329# else
330 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
331 fast than return_STRING_N_HASH_1.
332
333 Ditto. */
334
335# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
336 unsigned int hash = 5381;
337# endif
338
339
340 while (len >= 4)
341 {
342 UPDATE_HASH (str[0]);
343 UPDATE_HASH (str[1]);
344 UPDATE_HASH (str[2]);
345 UPDATE_HASH (str[3]);
346 str += 4;
347 len -= 4;
348 }
349 switch (len)
350 {
351 default:
352 case 0:
353 return hash;
354 case 1:
355 UPDATE_HASH (str[0]);
356 return hash;
357 case 2:
358 UPDATE_HASH (str[0]);
359 UPDATE_HASH (str[1]);
360 return hash;
361 case 3:
362 UPDATE_HASH (str[0]);
363 UPDATE_HASH (str[1]);
364 UPDATE_HASH (str[2]);
365 return hash;
366 }
367#endif
368}
369
370MY_INLINE unsigned int
371strcache2_case_insensitive_hash (const char *str, unsigned int len)
372{
373 unsigned int hash = 0;
374 if (MY_PREDICT_TRUE(len >= 2))
375 {
376 unsigned int ch0 = *str++;
377 ch0 = tolower (ch0);
378 hash = 0;
379 len--;
380 while (len >= 2)
381 {
382 unsigned int ch1 = *str++;
383 ch1 = tolower (ch1);
384 hash += ch0 << (ch1 & 0xf);
385
386 ch0 = *str++;
387 ch0 = tolower (ch0);
388 hash += ch1 << (ch0 & 0xf);
389
390 len -= 2;
391 }
392 if (len == 1)
393 {
394 unsigned ch1 = *str;
395 ch1 = tolower (ch1);
396 hash += ch0 << (ch1 & 0xf);
397
398 hash += ch1;
399 }
400 else
401 hash += ch0;
402 }
403 else if (len)
404 {
405 hash = *str;
406 hash += hash << (hash & 0xf);
407 }
408 else
409 hash = 0;
410 return hash;
411}
412
413MY_INLINE int
414strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
415{
416 if (length <= 8)
417 {
418 /* short string compare - ~50% of the kBuild calls. */
419 assert ( !((size_t)ys & 3) );
420 if (!((size_t)xs & 3))
421 {
422 /* aligned */
423 int result;
424 switch (length)
425 {
426 default: /* memcmp for longer strings */
427 return memcmp (xs, ys, length);
428 case 8:
429 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
430 result |= *(int32_t*)xs - *(int32_t*)ys;
431 return result;
432 case 7:
433 result = xs[6] - ys[6];
434 result |= xs[5] - ys[5];
435 result |= xs[4] - ys[4];
436 result |= *(int32_t*)xs - *(int32_t*)ys;
437 return result;
438 case 6:
439 result = xs[5] - ys[5];
440 result |= xs[4] - ys[4];
441 result |= *(int32_t*)xs - *(int32_t*)ys;
442 return result;
443 case 5:
444 result = xs[4] - ys[4];
445 result |= *(int32_t*)xs - *(int32_t*)ys;
446 return result;
447 case 4:
448 return *(int32_t*)xs - *(int32_t*)ys;
449 case 3:
450 result = xs[2] - ys[2];
451 result |= xs[1] - ys[1];
452 result |= xs[0] - ys[0];
453 return result;
454 case 2:
455 result = xs[1] - ys[1];
456 result |= xs[0] - ys[0];
457 return result;
458 case 1:
459 return *xs - *ys;
460 case 0:
461 return 0;
462 }
463 }
464 else
465 {
466 /* unaligned */
467 int result = 0;
468 switch (length)
469 {
470 case 8: result |= xs[7] - ys[7];
471 case 7: result |= xs[6] - ys[6];
472 case 6: result |= xs[5] - ys[5];
473 case 5: result |= xs[4] - ys[4];
474 case 4: result |= xs[3] - ys[3];
475 case 3: result |= xs[2] - ys[2];
476 case 2: result |= xs[1] - ys[1];
477 case 1: result |= xs[0] - ys[0];
478 case 0:
479 return result;
480 }
481 }
482 }
483
484 /* memcmp for longer strings */
485 return memcmp (xs, ys, length);
486}
487
488MY_INLINE int
489strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
490{
491#ifndef ELECTRIC_HEAP
492 assert ( !((size_t)ys & 3) );
493#endif
494 if (!((size_t)xs & 3))
495 {
496 int result;
497 /* aligned */
498 while (length >= 8)
499 {
500 result = *(int32_t*)xs - *(int32_t*)ys;
501 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
502 if (MY_PREDICT_FALSE(result))
503 return result;
504 xs += 8;
505 ys += 8;
506 length -= 8;
507 }
508 switch (length)
509 {
510 case 7:
511 result = *(int32_t*)xs - *(int32_t*)ys;
512 result |= xs[6] - ys[6];
513 result |= xs[5] - ys[5];
514 result |= xs[4] - ys[4];
515 return result;
516 case 6:
517 result = *(int32_t*)xs - *(int32_t*)ys;
518 result |= xs[5] - ys[5];
519 result |= xs[4] - ys[4];
520 return result;
521 case 5:
522 result = *(int32_t*)xs - *(int32_t*)ys;
523 result |= xs[4] - ys[4];
524 return result;
525 case 4:
526 return *(int32_t*)xs - *(int32_t*)ys;
527 case 3:
528 result = xs[2] - ys[2];
529 result |= xs[1] - ys[1];
530 result |= xs[0] - ys[0];
531 return result;
532 case 2:
533 result = xs[1] - ys[1];
534 result |= xs[0] - ys[0];
535 return result;
536 case 1:
537 return *xs - *ys;
538 default:
539 case 0:
540 return 0;
541 }
542 }
543 else
544 {
545 /* unaligned */
546 int result;
547 while (length >= 8)
548 {
549#if defined(__i386__) || defined(__x86_64__)
550 result = ( ((int32_t)xs[3] << 24)
551 | ((int32_t)xs[2] << 16)
552 | ((int32_t)xs[1] << 8)
553 | xs[0] )
554 - *(int32_t*)ys;
555 result |= ( ((int32_t)xs[7] << 24)
556 | ((int32_t)xs[6] << 16)
557 | ((int32_t)xs[5] << 8)
558 | xs[4] )
559 - *(int32_t*)(ys + 4);
560#else
561 result = xs[3] - ys[3];
562 result |= xs[2] - ys[2];
563 result |= xs[1] - ys[1];
564 result |= xs[0] - ys[0];
565 result |= xs[7] - ys[7];
566 result |= xs[6] - ys[6];
567 result |= xs[5] - ys[5];
568 result |= xs[4] - ys[4];
569#endif
570 if (MY_PREDICT_FALSE(result))
571 return result;
572 xs += 8;
573 ys += 8;
574 length -= 8;
575 }
576 result = 0;
577 switch (length)
578 {
579 case 7: result |= xs[6] - ys[6];
580 case 6: result |= xs[5] - ys[5];
581 case 5: result |= xs[4] - ys[4];
582 case 4: result |= xs[3] - ys[3];
583 case 3: result |= xs[2] - ys[2];
584 case 2: result |= xs[1] - ys[1];
585 case 1: result |= xs[0] - ys[0];
586 return result;
587 default:
588 case 0:
589 return 0;
590 }
591 }
592}
593
594MY_INLINE int
595strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
596 const char *str, unsigned int length, unsigned int hash)
597{
598 assert (!cache->case_insensitive);
599
600 /* the simple stuff first. */
601 if ( entry->hash != hash
602 || entry->length != length)
603 return 0;
604
605#if 0
606 return memcmp (str, entry + 1, length) == 0;
607#elif 1
608 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
609#else
610 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
611#endif
612}
613
614MY_INLINE int
615strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
616 const char *str, unsigned int length, unsigned int hash)
617{
618 assert (cache->case_insensitive);
619
620 /* the simple stuff first. */
621 if ( entry->hash != hash
622 || entry->length != length)
623 return 0;
624
625#if defined(_MSC_VER) || defined(__OS2__)
626 return _memicmp (entry + 1, str, length) == 0;
627#else
628 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
629#endif
630}
631
632static void
633strcache2_rehash (struct strcache2 *cache)
634{
635 unsigned int src = cache->hash_size;
636 struct strcache2_entry **src_tab = cache->hash_tab;
637 struct strcache2_entry **dst_tab;
638#ifndef STRCACHE2_USE_MASK
639 unsigned int hash_shift;
640#endif
641
642 /* Allocate a new hash table twice the size of the current. */
643 cache->hash_size <<= 1;
644#ifdef STRCACHE2_USE_MASK
645 cache->hash_mask <<= 1;
646 cache->hash_mask |= 1;
647#else
648 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
649 /* nothing */;
650 cache->hash_div = strcache2_find_prime (hash_shift);
651#endif
652 cache->rehash_count <<= 1;
653 cache->hash_tab = dst_tab = (struct strcache2_entry **)
654 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
655 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
656
657 /* Copy the entries from the old to the new hash table. */
658 cache->collision_count = 0;
659 while (src-- > 0)
660 {
661 struct strcache2_entry *entry = src_tab[src];
662 while (entry)
663 {
664 struct strcache2_entry *next = entry->next;
665 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
666 if ((entry->next = dst_tab[dst]) != 0)
667 cache->collision_count++;
668 dst_tab[dst] = entry;
669
670 entry = next;
671 }
672 }
673
674 /* That's it, just free the old table and we're done. */
675 free (src_tab);
676}
677
678static struct strcache2_seg *
679strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
680{
681 struct strcache2_seg *seg;
682 size_t size;
683 size_t off;
684
685 size = cache->def_seg_size;
686 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
687 {
688 size = (size_t)minlen * 2;
689 size = (size + 0xfff) & ~(size_t)0xfff;
690 }
691
692 seg = xmalloc (size);
693 seg->start = (char *)(seg + 1);
694 seg->size = size - sizeof (struct strcache2_seg);
695 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
696 if (off)
697 {
698 off = STRCACHE2_ENTRY_ALIGNMENT - off;
699 seg->start += off;
700 seg->size -= off;
701 }
702 assert (seg->size > minlen);
703 seg->cursor = seg->start;
704 seg->avail = seg->size;
705
706 seg->next = cache->seg_head;
707 cache->seg_head = seg;
708
709 return seg;
710}
711
712/* Internal worker that enters a new string into the cache. */
713static const char *
714strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
715 const char *str, unsigned int length,
716 unsigned int hash)
717{
718 struct strcache2_entry *entry;
719 struct strcache2_seg *seg;
720 unsigned int size;
721 char *str_copy;
722
723 /* Allocate space for the string. */
724
725 size = length + 1 + sizeof (struct strcache2_entry);
726 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
727
728 seg = cache->seg_head;
729 if (MY_PREDICT_FALSE(seg->avail < size))
730 {
731 do
732 seg = seg->next;
733 while (seg && seg->avail < size);
734 if (!seg)
735 seg = strcache2_new_seg (cache, size);
736 }
737
738 entry = (struct strcache2_entry *) seg->cursor;
739 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
740 seg->cursor += size;
741 seg->avail -= size;
742
743 /* Setup the entry, copy the string and insert it into the hash table. */
744
745 entry->user = NULL;
746 entry->length = length;
747 entry->hash = hash;
748 str_copy = (char *) memcpy (entry + 1, str, length);
749 str_copy[length] = '\0';
750
751 if ((entry->next = cache->hash_tab[idx]) != 0)
752 cache->collision_count++;
753 cache->hash_tab[idx] = entry;
754 cache->count++;
755 if (cache->count >= cache->rehash_count)
756 strcache2_rehash (cache);
757
758 return str_copy;
759}
760
761/* The public add string interface. */
762const char *
763strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
764{
765 struct strcache2_entry const *entry;
766 unsigned int hash = strcache2_case_sensitive_hash (str, length);
767 unsigned int idx;
768
769 assert (!cache->case_insensitive);
770 assert (!memchr (str, '\0', length));
771
772 MAKE_STATS (cache->lookup_count++);
773
774 /* Lookup the entry in the hash table, hoping for an
775 early match. If not found, enter the string at IDX. */
776 idx = STRCACHE2_MOD_IT (cache, hash);
777 entry = cache->hash_tab[idx];
778 if (!entry)
779 return strcache2_enter_string (cache, idx, str, length, hash);
780 if (strcache2_is_equal (cache, entry, str, length, hash))
781 return (const char *)(entry + 1);
782 MAKE_STATS (cache->collision_1st_count++);
783
784 entry = entry->next;
785 if (!entry)
786 return strcache2_enter_string (cache, idx, str, length, hash);
787 if (strcache2_is_equal (cache, entry, str, length, hash))
788 return (const char *)(entry + 1);
789 MAKE_STATS (cache->collision_2nd_count++);
790
791 /* Loop the rest. */
792 for (;;)
793 {
794 entry = entry->next;
795 if (!entry)
796 return strcache2_enter_string (cache, idx, str, length, hash);
797 if (strcache2_is_equal (cache, entry, str, length, hash))
798 return (const char *)(entry + 1);
799 MAKE_STATS (cache->collision_3rd_count++);
800 }
801 /* not reached */
802}
803
804/* The public add string interface for prehashed strings.
805 Use strcache2_hash_str to calculate the hash of a string. */
806const char *
807strcache2_add_hashed (struct strcache2 *cache, const char *str,
808 unsigned int length, unsigned int hash)
809{
810 struct strcache2_entry const *entry;
811 unsigned int idx;
812#ifndef NDEBUG
813 unsigned correct_hash;
814
815 assert (!cache->case_insensitive);
816 assert (!memchr (str, '\0', length));
817 correct_hash = strcache2_case_sensitive_hash (str, length);
818 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
819#endif /* NDEBUG */
820
821 MAKE_STATS (cache->lookup_count++);
822
823 /* Lookup the entry in the hash table, hoping for an
824 early match. If not found, enter the string at IDX. */
825 idx = STRCACHE2_MOD_IT (cache, hash);
826 entry = cache->hash_tab[idx];
827 if (!entry)
828 return strcache2_enter_string (cache, idx, str, length, hash);
829 if (strcache2_is_equal (cache, entry, str, length, hash))
830 return (const char *)(entry + 1);
831 MAKE_STATS (cache->collision_1st_count++);
832
833 entry = entry->next;
834 if (!entry)
835 return strcache2_enter_string (cache, idx, str, length, hash);
836 if (strcache2_is_equal (cache, entry, str, length, hash))
837 return (const char *)(entry + 1);
838 MAKE_STATS (cache->collision_2nd_count++);
839
840 /* Loop the rest. */
841 for (;;)
842 {
843 entry = entry->next;
844 if (!entry)
845 return strcache2_enter_string (cache, idx, str, length, hash);
846 if (strcache2_is_equal (cache, entry, str, length, hash))
847 return (const char *)(entry + 1);
848 MAKE_STATS (cache->collision_3rd_count++);
849 }
850 /* not reached */
851}
852
853/* The public lookup (case sensitive) string interface. */
854const char *
855strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
856{
857 struct strcache2_entry const *entry;
858 unsigned int hash = strcache2_case_sensitive_hash (str, length);
859 unsigned int idx;
860
861 assert (!cache->case_insensitive);
862 assert (!memchr (str, '\0', length));
863
864 MAKE_STATS (cache->lookup_count++);
865
866 /* Lookup the entry in the hash table, hoping for an
867 early match. */
868 idx = STRCACHE2_MOD_IT (cache, hash);
869 entry = cache->hash_tab[idx];
870 if (!entry)
871 return NULL;
872 if (strcache2_is_equal (cache, entry, str, length, hash))
873 return (const char *)(entry + 1);
874 MAKE_STATS (cache->collision_1st_count++);
875
876 entry = entry->next;
877 if (!entry)
878 return NULL;
879 if (strcache2_is_equal (cache, entry, str, length, hash))
880 return (const char *)(entry + 1);
881 MAKE_STATS (cache->collision_2nd_count++);
882
883 /* Loop the rest. */
884 for (;;)
885 {
886 entry = entry->next;
887 if (!entry)
888 return NULL;
889 if (strcache2_is_equal (cache, entry, str, length, hash))
890 return (const char *)(entry + 1);
891 MAKE_STATS (cache->collision_3rd_count++);
892 }
893 /* not reached */
894}
895
896#if defined(HAVE_CASE_INSENSITIVE_FS)
897
898/* The public add string interface for case insensitive strings. */
899const char *
900strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
901{
902 struct strcache2_entry const *entry;
903 unsigned int hash = strcache2_case_insensitive_hash (str, length);
904 unsigned int idx;
905
906 assert (cache->case_insensitive);
907 assert (!memchr (str, '\0', length));
908
909 MAKE_STATS (cache->lookup_count++);
910
911 /* Lookup the entry in the hash table, hoping for an
912 early match. If not found, enter the string at IDX. */
913 idx = STRCACHE2_MOD_IT (cache, hash);
914 entry = cache->hash_tab[idx];
915 if (!entry)
916 return strcache2_enter_string (cache, idx, str, length, hash);
917 if (strcache2_is_equal (cache, entry, str, length, hash))
918 return (const char *)(entry + 1);
919 MAKE_STATS (cache->collision_1st_count++);
920
921 entry = entry->next;
922 if (!entry)
923 return strcache2_enter_string (cache, idx, str, length, hash);
924 if (strcache2_is_equal (cache, entry, str, length, hash))
925 return (const char *)(entry + 1);
926 MAKE_STATS (cache->collision_2nd_count++);
927
928 /* Loop the rest. */
929 for (;;)
930 {
931 entry = entry->next;
932 if (!entry)
933 return strcache2_enter_string (cache, idx, str, length, hash);
934 if (strcache2_is_equal (cache, entry, str, length, hash))
935 return (const char *)(entry + 1);
936 MAKE_STATS (cache->collision_3rd_count++);
937 }
938 /* not reached */
939}
940
941/* The public add string interface for prehashed case insensitive strings.
942 Use strcache2_hash_istr to calculate the hash of a string. */
943const char *
944strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
945 unsigned int length, unsigned int hash)
946{
947 struct strcache2_entry const *entry;
948 unsigned int idx;
949#ifndef NDEBUG
950 unsigned correct_hash;
951
952 assert (cache->case_insensitive);
953 assert (!memchr (str, '\0', length));
954 correct_hash = strcache2_case_insensitive_hash (str, length);
955 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
956#endif /* NDEBUG */
957
958 MAKE_STATS (cache->lookup_count++);
959
960 /* Lookup the entry in the hash table, hoping for an
961 early match. If not found, enter the string at IDX. */
962 idx = STRCACHE2_MOD_IT (cache, hash);
963 entry = cache->hash_tab[idx];
964 if (!entry)
965 return strcache2_enter_string (cache, idx, str, length, hash);
966 if (strcache2_is_equal (cache, entry, str, length, hash))
967 return (const char *)(entry + 1);
968 MAKE_STATS (cache->collision_1st_count++);
969
970 entry = entry->next;
971 if (!entry)
972 return strcache2_enter_string (cache, idx, str, length, hash);
973 if (strcache2_is_equal (cache, entry, str, length, hash))
974 return (const char *)(entry + 1);
975 MAKE_STATS (cache->collision_2nd_count++);
976
977 /* Loop the rest. */
978 for (;;)
979 {
980 entry = entry->next;
981 if (!entry)
982 return strcache2_enter_string (cache, idx, str, length, hash);
983 if (strcache2_is_equal (cache, entry, str, length, hash))
984 return (const char *)(entry + 1);
985 MAKE_STATS (cache->collision_3rd_count++);
986 }
987 /* not reached */
988}
989
990/* The public lookup (case insensitive) string interface. */
991const char *
992strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
993{
994 struct strcache2_entry const *entry;
995 unsigned int hash = strcache2_case_insensitive_hash (str, length);
996 unsigned int idx;
997
998 assert (cache->case_insensitive);
999 assert (!memchr (str, '\0', length));
1000
1001 MAKE_STATS (cache->lookup_count++);
1002
1003 /* Lookup the entry in the hash table, hoping for an
1004 early match. */
1005 idx = STRCACHE2_MOD_IT (cache, hash);
1006 entry = cache->hash_tab[idx];
1007 if (!entry)
1008 return NULL;
1009 if (strcache2_is_equal (cache, entry, str, length, hash))
1010 return (const char *)(entry + 1);
1011 MAKE_STATS (cache->collision_1st_count++);
1012
1013 entry = entry->next;
1014 if (!entry)
1015 return NULL;
1016 if (strcache2_is_equal (cache, entry, str, length, hash))
1017 return (const char *)(entry + 1);
1018 MAKE_STATS (cache->collision_2nd_count++);
1019
1020 /* Loop the rest. */
1021 for (;;)
1022 {
1023 entry = entry->next;
1024 if (!entry)
1025 return NULL;
1026 if (strcache2_is_equal (cache, entry, str, length, hash))
1027 return (const char *)(entry + 1);
1028 MAKE_STATS (cache->collision_3rd_count++);
1029 }
1030 /* not reached */
1031}
1032
1033#endif /* HAVE_CASE_INSENSITIVE_FS */
1034
1035/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1036int
1037strcache2_is_cached (struct strcache2 *cache, const char *str)
1038{
1039 /* Check mandatory alignment first. */
1040 if (!((size_t)str & (sizeof (void *) - 1)))
1041 {
1042 /* Check the segment list and consider the question answered if the
1043 string is within one of them. (Could check it more thoroughly...) */
1044 struct strcache2_seg const *seg;
1045 for (seg = cache->seg_head; seg; seg = seg->next)
1046 if ((size_t)(str - seg->start) < seg->size)
1047 return 1;
1048 }
1049
1050 return 0;
1051}
1052
1053
1054/* Verify the integrity of the specified string, returning 0 if OK. */
1055int
1056strcache2_verify_entry (struct strcache2 *cache, const char *str)
1057{
1058 struct strcache2_entry const *entry;
1059 unsigned int hash;
1060 unsigned int length;
1061 const char *end;
1062
1063 entry = (struct strcache2_entry const *)str - 1;
1064 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1065 {
1066 fprintf (stderr,
1067 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1068 cache->name, (void *)entry, (void *)str, str);
1069 return -1;
1070 }
1071
1072 end = memchr (str, '\0', entry->length + 1);
1073 length = end - str;
1074 if (length != entry->length)
1075 {
1076 fprintf (stderr,
1077 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1078 cache->name, (void *)entry, length, entry->length, str);
1079 return -1;
1080 }
1081
1082 hash = cache->case_insensitive
1083 ? strcache2_case_insensitive_hash (str, entry->length)
1084 : strcache2_case_sensitive_hash (str, entry->length);
1085 if (hash != entry->hash)
1086 {
1087 fprintf (stderr,
1088 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1089 cache->name, (void *)entry, hash, entry->hash, str);
1090 return -1;
1091 }
1092
1093 return 0;
1094}
1095
1096
1097/* Calculates the case sensitive hash values of the string.
1098 The first hash is returned, the other is put at HASH2P. */
1099unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1100{
1101 *hash2p = 1;
1102 return strcache2_case_sensitive_hash (str, length);
1103}
1104
1105/* Calculates the case insensitive hash values of the string.
1106 The first hash is returned, the other is put at HASH2P. */
1107unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1108{
1109 *hash2p = 1;
1110 return strcache2_case_insensitive_hash (str, length);
1111}
1112
1113
1114
1115/* Initalizes a new cache. */
1116void
1117strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1118 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1119{
1120 unsigned hash_shift;
1121 assert (!thread_safe);
1122
1123 /* calc the size as a power of two */
1124 if (!size)
1125 hash_shift = STRCACHE2_HASH_SHIFT;
1126 else
1127 {
1128 assert (size <= (~0U / 2 + 1));
1129 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1130 /* nothing */;
1131 }
1132
1133 /* adjust the default segment size */
1134 if (!def_seg_size)
1135 def_seg_size = STRCACHE2_SEG_SIZE;
1136 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1137 def_seg_size = sizeof (struct strcache2_seg) * 10;
1138 else if ((def_seg_size & 0xfff) < 0xf00)
1139 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1140
1141
1142 /* init the structure. */
1143 cache->case_insensitive = case_insensitive;
1144#ifdef STRCACHE2_USE_MASK
1145 cache->hash_mask = (1U << hash_shift) - 1U;
1146#else
1147 cache->hash_div = strcache2_find_prime(hash_shift);
1148#endif
1149 cache->count = 0;
1150 cache->collision_count = 0;
1151 cache->lookup_count = 0;
1152 cache->collision_1st_count = 0;
1153 cache->collision_2nd_count = 0;
1154 cache->collision_3rd_count = 0;
1155 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1156 cache->init_size = 1U << hash_shift;
1157 cache->hash_size = 1U << hash_shift;
1158 cache->def_seg_size = def_seg_size;
1159 cache->lock = NULL;
1160 cache->name = name;
1161
1162 /* allocate the hash table and first segment. */
1163 cache->hash_tab = (struct strcache2_entry **)
1164 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1165 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1166 strcache2_new_seg (cache, 0);
1167
1168 /* link it */
1169 cache->next = strcache_head;
1170 strcache_head = cache;
1171}
1172
1173
1174/* Terminates a string cache, freeing all memory and other
1175 associated resources. */
1176void
1177strcache2_term (struct strcache2 *cache)
1178{
1179 /* unlink it */
1180 if (strcache_head == cache)
1181 strcache_head = cache->next;
1182 else
1183 {
1184 struct strcache2 *prev = strcache_head;
1185 while (prev->next != cache)
1186 prev = prev->next;
1187 assert (prev);
1188 prev->next = cache->next;
1189 }
1190
1191 /* free the memory segments */
1192 do
1193 {
1194 void *free_it = cache->seg_head;
1195 cache->seg_head = cache->seg_head->next;
1196 free (free_it);
1197 }
1198 while (cache->seg_head);
1199
1200 /* free the hash and clear the structure. */
1201 free (cache->hash_tab);
1202 memset (cache, '\0', sizeof (struct strcache2));
1203}
1204
1205/* Print statistics a string cache. */
1206void
1207strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1208{
1209 unsigned int seg_count = 0;
1210 unsigned long seg_total_bytes = 0;
1211 unsigned long seg_avg_bytes;
1212 unsigned long seg_avail_bytes = 0;
1213 unsigned long seg_max_bytes = 0;
1214 struct strcache2_seg *seg;
1215 unsigned int str_count = 0;
1216 unsigned long str_total_len = 0;
1217 unsigned int str_avg_len;
1218 unsigned int str_min_len = ~0U;
1219 unsigned int str_max_len = 0;
1220 unsigned int idx;
1221 unsigned int rehashes;
1222 unsigned int chain_depths[32];
1223
1224 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1225
1226 /* Segment statistics. */
1227 for (seg = cache->seg_head; seg; seg = seg->next)
1228 {
1229 seg_count++;
1230 seg_total_bytes += seg->size;
1231 seg_avail_bytes += seg->avail;
1232 if (seg->size > seg_max_bytes)
1233 seg_max_bytes = seg->size;
1234 }
1235 seg_avg_bytes = seg_total_bytes / seg_count;
1236 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1237 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1238 cache->def_seg_size, seg_avail_bytes);
1239
1240 /* String statistics. */
1241 memset (chain_depths, '\0', sizeof (chain_depths));
1242 idx = cache->hash_size;
1243 while (idx-- > 0)
1244 {
1245 struct strcache2_entry const *entry = cache->hash_tab[idx];
1246 unsigned int depth = 0;
1247 for (; entry != 0; entry = entry->next, depth++)
1248 {
1249 unsigned int length = entry->length;
1250 str_total_len += length;
1251 if (length > str_max_len)
1252 str_max_len = length;
1253 if (length < str_min_len)
1254 str_min_len = length;
1255 str_count++;
1256 }
1257 chain_depths[depth >= 32 ? 31 : depth]++;
1258 }
1259 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1260 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1261 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1262 if (str_count != cache->count)
1263 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1264 cache->count, str_count);
1265
1266 /* Hash statistics. */
1267 idx = cache->init_size;
1268 rehashes = 0;
1269 while (idx < cache->hash_size)
1270 {
1271 idx *= 2;
1272 rehashes++;
1273 }
1274
1275#ifdef STRCACHE2_USE_MASK
1276 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1277 prefix, cache->hash_size, cache->hash_mask, rehashes);
1278#else
1279 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1280 prefix, cache->hash_size, cache->hash_div, rehashes);
1281#endif
1282 if (cache->lookup_count)
1283 printf (_("%s lookups = %lu\n"
1284 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1285 prefix, cache->lookup_count,
1286 prefix,
1287 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1288 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1289 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1290 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1291 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1292 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1293 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1294 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1295 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1296 for (idx = 2; idx < 32; idx++)
1297 {
1298 unsigned strs_at_this_depth = chain_depths[idx];
1299 unsigned i;
1300 for (i = idx + 1; i < 32; i++)
1301 strs_at_this_depth += chain_depths[i];
1302 if (strs_at_this_depth)
1303 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1304 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1305 idx - 1, idx == 2 ? " " : "s",
1306 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1307 }
1308}
1309
1310/* Print statistics for all string caches. */
1311void
1312strcache2_print_stats_all (const char *prefix)
1313{
1314 struct strcache2 *cur;
1315 for (cur = strcache_head; cur; cur = cur->next)
1316 strcache2_print_stats (cur, prefix);
1317}
1318
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette