strcache2.c@ 2912

Last change on this file since 2912 was 2799, checked in by bird, 10 years ago
Fixed glob.h inclusion issue causing stack corruption. Fixed alignment issue in the string expansion compiler. More makefile eval 'compiler' work.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 38.5 KB

Line
1	/* $Id: strcache2.c 2799 2015-09-19 20:36:31Z bird $ */
2	/** @file
3	* strcache2 - New string cache.
4	*/
5
6	/*
7	* Copyright (c) 2008-2010 knut st. osmundsen <[email protected]>
8	*
9	* This file is part of kBuild.
10	*
11	* kBuild is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 3 of the License, or
14	* (at your option) any later version.
15	*
16	* kBuild is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with kBuild. If not, see <http://www.gnu.org/licenses/>
23	*
24	*/
25
26	/*******************************************************************************
27	* Header Files *
28	*******************************************************************************/
29	#include "make.h"
30	#include "strcache2.h"
31
32	#include <assert.h>
33
34	#include "debug.h"
35
36	#ifdef _MSC_VER
37	typedef unsigned char uint8_t;
38	typedef unsigned short uint16_t;
39	typedef unsigned int uint32_t;
40	typedef signed char int8_t;
41	typedef signed short int16_t;
42	typedef signed int int32_t;
43	#else
44	# include <stdint.h>
45	#endif
46
47	#ifdef WINDOWS32
48	# include <io.h>
49	# include <process.h>
50	# include <Windows.h>
51	# define PARSE_IN_WORKER
52	#endif
53
54	#ifdef __OS2__
55	# include <sys/fmutex.h>
56	#endif
57
58	#ifdef HAVE_PTHREAD
59	# include <pthread.h>
60	#endif
61
62
63	/*******************************************************************************
64	* Defined Constants And Macros *
65	*******************************************************************************/
66	/* The default size of a memory segment (1MB). */
67	#define STRCACHE2_SEG_SIZE (1024U*1024U)
68	/* The default hash table shift (hash size give as a power of two). */
69	#define STRCACHE2_HASH_SHIFT 16
70	/** Does the modding / masking of a hash number into an index. */
71	#ifdef STRCACHE2_USE_MASK
72	# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73	#else
74	# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75	#endif
76
77	# if ( defined(__amd64__) \|\| defined(__x86_64__) \|\| defined(__AMD64__) \|\| defined(_M_X64) \|\| defined(__amd64) \
78	\|\| defined(__i386__) \|\| defined(__x86__) \|\| defined(__X86__) \|\| defined(_M_IX86) \|\| defined(__i386)) \
79	&& !defined(GCC_ADDRESS_SANITIZER)
80	# define strcache2_get_unaligned_16bits(ptr) ( ((const uint16_t )(ptr)))
81	# else
82	/* (endian doesn't matter) */
83	# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
84	\| (((const uint8_t *)(ptr))[1]) )
85	# endif
86
87
88	/*******************************************************************************
89	* Global Variables *
90	*******************************************************************************/
91	/* List of initialized string caches. */
92	static struct strcache2 *strcache_head;
93
94
95	/** Finds the closest primary number for power of two value (or something else
96	* useful if not support). */
97	MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
98	{
99	switch (shift)
100	{
101	case 5: return 31;
102	case 6: return 61;
103	case 7: return 127;
104	case 8: return 251;
105	case 9: return 509;
106	case 10: return 1021;
107	case 11: return 2039;
108	case 12: return 4093;
109	case 13: return 8191;
110	case 14: return 16381;
111	case 15: return 32749;
112	case 16: return 65521;
113	case 17: return 131063;
114	case 18: return 262139;
115	case 19: return 524269;
116	case 20: return 1048573;
117	case 21: return 2097143;
118	case 22: return 4194301;
119	case 23: return 8388593;
120
121	default:
122	assert (0);
123	return (1 << shift) - 1;
124	}
125	}
126
127	/* The following is a bit experiment. It produces longer chains, i.e. worse
128	distribution of the strings in the table, however the actual make
129	performances is better (<time). The explanation is probably that the
130	collisions only really increase for entries that aren't looked up that
131	much and that it actually improoves the situation for those that is. Or
132	that we spend so much less time hashing that it makes up (and more) for
133	the pentalty we suffer from the longer chains and worse distribution.
134
135	XXX: Check how this works out with different path lengths. I suspect it
136	might depend on the length of PATH_ROOT and the depth of the files
137	in the project as well. If it does, this might make matters worse
138	for some and better for others which isn't very cool... */
139
140	#if 0
141	# define BIG_HASH_SIZE 32 /* kinda fast */
142	# define BIG_HASH_HEAD 16
143	# define BIG_HASH_TAIL 12
144	#elif 0
145	# define BIG_HASH_SIZE 68 /* kinda safe */
146	# define BIG_HASH_HEAD 24
147	# define BIG_HASH_TAIL 24
148	#elif 0
149	# define BIG_HASH_SIZE 128 /* safe */
150	# define BIG_HASH_HEAD 32
151	# define BIG_HASH_TAIL 32
152	#endif
153
154	#ifdef BIG_HASH_SIZE
155	/* long string: hash head and tail, drop the middle. */
156	MY_INLINE unsigned int
157	strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
158	{
159	uint32_t hash = len;
160	uint32_t tmp;
161	unsigned int head;
162
163	/* head BIG_HASH_HEAD bytes */
164	head = (BIG_HASH_HEAD >> 2);
165	while (head-- > 0)
166	{
167	hash += strcache2_get_unaligned_16bits (str);
168	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
169	hash = (hash << 16) ^ tmp;
170	str += 2 * sizeof (uint16_t);
171	hash += hash >> 11;
172	}
173
174	/* tail BIG_HASH_TAIL bytes (minus the odd ones) */
175	str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
176	head = (BIG_HASH_TAIL >> 2);
177	while (head-- > 0)
178	{
179	hash += strcache2_get_unaligned_16bits (str);
180	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
181	hash = (hash << 16) ^ tmp;
182	str += 2 * sizeof (uint16_t);
183	hash += hash >> 11;
184	}
185
186	/* force "avalanching" of final 127 bits. */
187	hash ^= hash << 3;
188	hash += hash >> 5;
189	hash ^= hash << 4;
190	hash += hash >> 17;
191	hash ^= hash << 25;
192	hash += hash >> 6;
193
194	return hash;
195	}
196	#endif /* BIG_HASH_SIZE */
197
198	MY_INLINE unsigned int
199	strcache2_case_sensitive_hash (const char *str, unsigned int len)
200	{
201	#if 1
202	/* Paul Hsieh hash SuperFast function:
203	http://www.azillionmonkeys.com/qed/hash.html
204
205	This performs very good and as a sligtly better distribution than
206	STRING_N_HASH_1 on a typical kBuild run.
207
208	It is also 37% faster than return_STRING_N_HASH_1 when running the
209	two 100 times over typical kBuild strings that end up here (did a
210	fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
211	with -O2.
212
213	FIXME: A path for well aligned data should be added to speed up
214	execution on alignment sensitive systems. */
215	unsigned int rem;
216	uint32_t hash;
217	uint32_t tmp;
218
219	assert (sizeof (uint8_t) == sizeof (char));
220
221	# ifdef BIG_HASH_SIZE
222	/* long string? */
223	# if 0 /BIG_HASH_SIZE > 128/
224	if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
225	# else
226	if (len >= BIG_HASH_SIZE)
227	# endif
228	return strcache2_case_sensitive_hash_big (str, len);
229	# endif
230
231	/* short string: main loop, walking on 2 x uint16_t */
232	hash = len;
233	rem = len & 3;
234	len >>= 2;
235	while (len > 0)
236	{
237	hash += strcache2_get_unaligned_16bits (str);
238	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
239	hash = (hash << 16) ^ tmp;
240	str += 2 * sizeof (uint16_t);
241	hash += hash >> 11;
242	len--;
243	}
244
245	/* the remainder */
246	switch (rem)
247	{
248	case 3:
249	hash += strcache2_get_unaligned_16bits (str);
250	hash ^= hash << 16;
251	hash ^= str[sizeof (uint16_t)] << 18;
252	hash += hash >> 11;
253	break;
254	case 2:
255	hash += strcache2_get_unaligned_16bits (str);
256	hash ^= hash << 11;
257	hash += hash >> 17;
258	break;
259	case 1:
260	hash += *str;
261	hash ^= hash << 10;
262	hash += hash >> 1;
263	break;
264	}
265
266	/* force "avalanching" of final 127 bits. */
267	hash ^= hash << 3;
268	hash += hash >> 5;
269	hash ^= hash << 4;
270	hash += hash >> 17;
271	hash ^= hash << 25;
272	hash += hash >> 6;
273
274	return hash;
275
276	#elif 1
277	/* Note! This implementation is 18% faster than return_STRING_N_HASH_1
278	when running the two 100 times over typical kBuild strings that
279	end up here (did a fprintf here and built kBuild).
280	Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
281
282	unsigned int hash = 0;
283	if (MY_PREDICT_TRUE(len >= 2))
284	{
285	unsigned int ch0 = *str++;
286	hash = 0;
287	len--;
288	while (len >= 2)
289	{
290	unsigned int ch1 = *str++;
291	hash += ch0 << (ch1 & 0xf);
292
293	ch0 = *str++;
294	hash += ch1 << (ch0 & 0xf);
295
296	len -= 2;
297	}
298	if (len == 1)
299	{
300	unsigned ch1 = *str;
301	hash += ch0 << (ch1 & 0xf);
302
303	hash += ch1;
304	}
305	else
306	hash += ch0;
307	}
308	else if (len)
309	{
310	hash = *str;
311	hash += hash << (hash & 0xf);
312	}
313	else
314	hash = 0;
315	return hash;
316
317	#elif 1
318	# if 0
319	/* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
320	than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
321	first in the assignment made it noticably slower.)
322
323	However, it is noticably slower in practice, most likely because of more
324	collisions. Hrmpf. */
325
326	# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
327	unsigned int hash = 0;
328
329	# else
330	/* This is DJB2. This specific form/unroll was benchmarked to be 27%
331	fast than return_STRING_N_HASH_1.
332
333	Ditto. */
334
335	# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
336	unsigned int hash = 5381;
337	# endif
338
339
340	while (len >= 4)
341	{
342	UPDATE_HASH (str[0]);
343	UPDATE_HASH (str[1]);
344	UPDATE_HASH (str[2]);
345	UPDATE_HASH (str[3]);
346	str += 4;
347	len -= 4;
348	}
349	switch (len)
350	{
351	default:
352	case 0:
353	return hash;
354	case 1:
355	UPDATE_HASH (str[0]);
356	return hash;
357	case 2:
358	UPDATE_HASH (str[0]);
359	UPDATE_HASH (str[1]);
360	return hash;
361	case 3:
362	UPDATE_HASH (str[0]);
363	UPDATE_HASH (str[1]);
364	UPDATE_HASH (str[2]);
365	return hash;
366	}
367	#endif
368	}
369
370	MY_INLINE unsigned int
371	strcache2_case_insensitive_hash (const char *str, unsigned int len)
372	{
373	unsigned int hash = 0;
374	if (MY_PREDICT_TRUE(len >= 2))
375	{
376	unsigned int ch0 = *str++;
377	ch0 = tolower (ch0);
378	hash = 0;
379	len--;
380	while (len >= 2)
381	{
382	unsigned int ch1 = *str++;
383	ch1 = tolower (ch1);
384	hash += ch0 << (ch1 & 0xf);
385
386	ch0 = *str++;
387	ch0 = tolower (ch0);
388	hash += ch1 << (ch0 & 0xf);
389
390	len -= 2;
391	}
392	if (len == 1)
393	{
394	unsigned ch1 = *str;
395	ch1 = tolower (ch1);
396	hash += ch0 << (ch1 & 0xf);
397
398	hash += ch1;
399	}
400	else
401	hash += ch0;
402	}
403	else if (len)
404	{
405	hash = *str;
406	hash += hash << (hash & 0xf);
407	}
408	else
409	hash = 0;
410	return hash;
411	}
412
413	MY_INLINE int
414	strcache2_memcmp_inline_short (const char xs, const char ys, unsigned int length)
415	{
416	if (length <= 8)
417	{
418	/* short string compare - ~50% of the kBuild calls. */
419	assert ( !((size_t)ys & 3) );
420	if (!((size_t)xs & 3))
421	{
422	/* aligned */
423	int result;
424	switch (length)
425	{
426	default: /* memcmp for longer strings */
427	return memcmp (xs, ys, length);
428	case 8:
429	result = (int32_t)(xs + 4) - (int32_t)(ys + 4);
430	result \|= (int32_t)xs - (int32_t)ys;
431	return result;
432	case 7:
433	result = xs[6] - ys[6];
434	result \|= xs[5] - ys[5];
435	result \|= xs[4] - ys[4];
436	result \|= (int32_t)xs - (int32_t)ys;
437	return result;
438	case 6:
439	result = xs[5] - ys[5];
440	result \|= xs[4] - ys[4];
441	result \|= (int32_t)xs - (int32_t)ys;
442	return result;
443	case 5:
444	result = xs[4] - ys[4];
445	result \|= (int32_t)xs - (int32_t)ys;
446	return result;
447	case 4:
448	return (int32_t)xs - (int32_t)ys;
449	case 3:
450	result = xs[2] - ys[2];
451	result \|= xs[1] - ys[1];
452	result \|= xs[0] - ys[0];
453	return result;
454	case 2:
455	result = xs[1] - ys[1];
456	result \|= xs[0] - ys[0];
457	return result;
458	case 1:
459	return xs - ys;
460	case 0:
461	return 0;
462	}
463	}
464	else
465	{
466	/* unaligned */
467	int result = 0;
468	switch (length)
469	{
470	case 8: result \|= xs[7] - ys[7];
471	case 7: result \|= xs[6] - ys[6];
472	case 6: result \|= xs[5] - ys[5];
473	case 5: result \|= xs[4] - ys[4];
474	case 4: result \|= xs[3] - ys[3];
475	case 3: result \|= xs[2] - ys[2];
476	case 2: result \|= xs[1] - ys[1];
477	case 1: result \|= xs[0] - ys[0];
478	case 0:
479	return result;
480	}
481	}
482	}
483
484	/* memcmp for longer strings */
485	return memcmp (xs, ys, length);
486	}
487
488	MY_INLINE int
489	strcache2_memcmp_inlined (const char xs, const char ys, unsigned int length)
490	{
491	#ifndef ELECTRIC_HEAP
492	assert ( !((size_t)ys & 3) );
493	#endif
494	if (!((size_t)xs & 3))
495	{
496	int result;
497	/* aligned */
498	while (length >= 8)
499	{
500	result = (int32_t)xs - (int32_t)ys;
501	result \|= (int32_t)(xs + 4) - (int32_t)(ys + 4);
502	if (MY_PREDICT_FALSE(result))
503	return result;
504	xs += 8;
505	ys += 8;
506	length -= 8;
507	}
508	switch (length)
509	{
510	case 7:
511	result = (int32_t)xs - (int32_t)ys;
512	result \|= xs[6] - ys[6];
513	result \|= xs[5] - ys[5];
514	result \|= xs[4] - ys[4];
515	return result;
516	case 6:
517	result = (int32_t)xs - (int32_t)ys;
518	result \|= xs[5] - ys[5];
519	result \|= xs[4] - ys[4];
520	return result;
521	case 5:
522	result = (int32_t)xs - (int32_t)ys;
523	result \|= xs[4] - ys[4];
524	return result;
525	case 4:
526	return (int32_t)xs - (int32_t)ys;
527	case 3:
528	result = xs[2] - ys[2];
529	result \|= xs[1] - ys[1];
530	result \|= xs[0] - ys[0];
531	return result;
532	case 2:
533	result = xs[1] - ys[1];
534	result \|= xs[0] - ys[0];
535	return result;
536	case 1:
537	return xs - ys;
538	default:
539	case 0:
540	return 0;
541	}
542	}
543	else
544	{
545	/* unaligned */
546	int result;
547	while (length >= 8)
548	{
549	#if defined(__i386__) \|\| defined(__x86_64__)
550	result = ( ((int32_t)xs[3] << 24)
551	\| ((int32_t)xs[2] << 16)
552	\| ((int32_t)xs[1] << 8)
553	\| xs[0] )
554	- (int32_t)ys;
555	result \|= ( ((int32_t)xs[7] << 24)
556	\| ((int32_t)xs[6] << 16)
557	\| ((int32_t)xs[5] << 8)
558	\| xs[4] )
559	- (int32_t)(ys + 4);
560	#else
561	result = xs[3] - ys[3];
562	result \|= xs[2] - ys[2];
563	result \|= xs[1] - ys[1];
564	result \|= xs[0] - ys[0];
565	result \|= xs[7] - ys[7];
566	result \|= xs[6] - ys[6];
567	result \|= xs[5] - ys[5];
568	result \|= xs[4] - ys[4];
569	#endif
570	if (MY_PREDICT_FALSE(result))
571	return result;
572	xs += 8;
573	ys += 8;
574	length -= 8;
575	}
576	result = 0;
577	switch (length)
578	{
579	case 7: result \|= xs[6] - ys[6];
580	case 6: result \|= xs[5] - ys[5];
581	case 5: result \|= xs[4] - ys[4];
582	case 4: result \|= xs[3] - ys[3];
583	case 3: result \|= xs[2] - ys[2];
584	case 2: result \|= xs[1] - ys[1];
585	case 1: result \|= xs[0] - ys[0];
586	return result;
587	default:
588	case 0:
589	return 0;
590	}
591	}
592	}
593
594	MY_INLINE int
595	strcache2_is_equal (struct strcache2 cache, struct strcache2_entry const entry,
596	const char *str, unsigned int length, unsigned int hash)
597	{
598	assert (!cache->case_insensitive);
599
600	/* the simple stuff first. */
601	if ( entry->hash != hash
602	\|\| entry->length != length)
603	return 0;
604
605	#if 0
606	return memcmp (str, entry + 1, length) == 0;
607	#elif 1
608	return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
609	#else
610	return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
611	#endif
612	}
613
614	MY_INLINE int
615	strcache2_is_iequal (struct strcache2 cache, struct strcache2_entry const entry,
616	const char *str, unsigned int length, unsigned int hash)
617	{
618	assert (cache->case_insensitive);
619
620	/* the simple stuff first. */
621	if ( entry->hash != hash
622	\|\| entry->length != length)
623	return 0;
624
625	#if defined(_MSC_VER) \|\| defined(__OS2__)
626	return _memicmp (entry + 1, str, length) == 0;
627	#else
628	return strncasecmp ((const char *)(entry + 1), str, length) == 0;
629	#endif
630	}
631
632	static void
633	strcache2_rehash (struct strcache2 *cache)
634	{
635	unsigned int src = cache->hash_size;
636	struct strcache2_entry **src_tab = cache->hash_tab;
637	struct strcache2_entry **dst_tab;
638	#ifndef STRCACHE2_USE_MASK
639	unsigned int hash_shift;
640	#endif
641
642	/* Allocate a new hash table twice the size of the current. */
643	cache->hash_size <<= 1;
644	#ifdef STRCACHE2_USE_MASK
645	cache->hash_mask <<= 1;
646	cache->hash_mask \|= 1;
647	#else
648	for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
649	/* nothing */;
650	cache->hash_div = strcache2_find_prime (hash_shift);
651	#endif
652	cache->rehash_count <<= 1;
653	cache->hash_tab = dst_tab = (struct strcache2_entry **)
654	xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
655	memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
656
657	/* Copy the entries from the old to the new hash table. */
658	cache->collision_count = 0;
659	while (src-- > 0)
660	{
661	struct strcache2_entry *entry = src_tab[src];
662	while (entry)
663	{
664	struct strcache2_entry *next = entry->next;
665	unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
666	if ((entry->next = dst_tab[dst]) != 0)
667	cache->collision_count++;
668	dst_tab[dst] = entry;
669
670	entry = next;
671	}
672	}
673
674	/* That's it, just free the old table and we're done. */
675	free (src_tab);
676	}
677
678	static struct strcache2_seg *
679	strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
680	{
681	struct strcache2_seg *seg;
682	size_t size;
683	size_t off;
684
685	size = cache->def_seg_size;
686	if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
687	{
688	size = (size_t)minlen * 2;
689	size = (size + 0xfff) & ~(size_t)0xfff;
690	}
691
692	seg = xmalloc (size);
693	seg->start = (char *)(seg + 1);
694	seg->size = size - sizeof (struct strcache2_seg);
695	off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
696	if (off)
697	{
698	off = STRCACHE2_ENTRY_ALIGNMENT - off;
699	seg->start += off;
700	seg->size -= off;
701	}
702	assert (seg->size > minlen);
703	seg->cursor = seg->start;
704	seg->avail = seg->size;
705
706	seg->next = cache->seg_head;
707	cache->seg_head = seg;
708
709	return seg;
710	}
711
712	/* Internal worker that enters a new string into the cache. */
713	static const char *
714	strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
715	const char *str, unsigned int length,
716	unsigned int hash)
717	{
718	struct strcache2_entry *entry;
719	struct strcache2_seg *seg;
720	unsigned int size;
721	char *str_copy;
722
723	/* Allocate space for the string. */
724
725	size = length + 1 + sizeof (struct strcache2_entry);
726	size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
727
728	seg = cache->seg_head;
729	if (MY_PREDICT_FALSE(seg->avail < size))
730	{
731	do
732	seg = seg->next;
733	while (seg && seg->avail < size);
734	if (!seg)
735	seg = strcache2_new_seg (cache, size);
736	}
737
738	entry = (struct strcache2_entry *) seg->cursor;
739	assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
740	seg->cursor += size;
741	seg->avail -= size;
742
743	/* Setup the entry, copy the string and insert it into the hash table. */
744
745	entry->user = NULL;
746	entry->length = length;
747	entry->hash = hash;
748	str_copy = (char *) memcpy (entry + 1, str, length);
749	str_copy[length] = '\0';
750
751	if ((entry->next = cache->hash_tab[idx]) != 0)
752	cache->collision_count++;
753	cache->hash_tab[idx] = entry;
754	cache->count++;
755	if (cache->count >= cache->rehash_count)
756	strcache2_rehash (cache);
757
758	return str_copy;
759	}
760
761	/* The public add string interface. */
762	const char *
763	strcache2_add (struct strcache2 cache, const char str, unsigned int length)
764	{
765	struct strcache2_entry const *entry;
766	unsigned int hash = strcache2_case_sensitive_hash (str, length);
767	unsigned int idx;
768
769	assert (!cache->case_insensitive);
770	assert (!memchr (str, '\0', length));
771
772	MAKE_STATS (cache->lookup_count++);
773
774	/* Lookup the entry in the hash table, hoping for an
775	early match. If not found, enter the string at IDX. */
776	idx = STRCACHE2_MOD_IT (cache, hash);
777	entry = cache->hash_tab[idx];
778	if (!entry)
779	return strcache2_enter_string (cache, idx, str, length, hash);
780	if (strcache2_is_equal (cache, entry, str, length, hash))
781	return (const char *)(entry + 1);
782	MAKE_STATS (cache->collision_1st_count++);
783
784	entry = entry->next;
785	if (!entry)
786	return strcache2_enter_string (cache, idx, str, length, hash);
787	if (strcache2_is_equal (cache, entry, str, length, hash))
788	return (const char *)(entry + 1);
789	MAKE_STATS (cache->collision_2nd_count++);
790
791	/* Loop the rest. */
792	for (;;)
793	{
794	entry = entry->next;
795	if (!entry)
796	return strcache2_enter_string (cache, idx, str, length, hash);
797	if (strcache2_is_equal (cache, entry, str, length, hash))
798	return (const char *)(entry + 1);
799	MAKE_STATS (cache->collision_3rd_count++);
800	}
801	/* not reached */
802	}
803
804	/* The public add string interface for prehashed strings.
805	Use strcache2_hash_str to calculate the hash of a string. */
806	const char *
807	strcache2_add_hashed (struct strcache2 cache, const char str,
808	unsigned int length, unsigned int hash)
809	{
810	struct strcache2_entry const *entry;
811	unsigned int idx;
812	#ifndef NDEBUG
813	unsigned correct_hash;
814
815	assert (!cache->case_insensitive);
816	assert (!memchr (str, '\0', length));
817	correct_hash = strcache2_case_sensitive_hash (str, length);
818	MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
819	#endif /* NDEBUG */
820
821	MAKE_STATS (cache->lookup_count++);
822
823	/* Lookup the entry in the hash table, hoping for an
824	early match. If not found, enter the string at IDX. */
825	idx = STRCACHE2_MOD_IT (cache, hash);
826	entry = cache->hash_tab[idx];
827	if (!entry)
828	return strcache2_enter_string (cache, idx, str, length, hash);
829	if (strcache2_is_equal (cache, entry, str, length, hash))
830	return (const char *)(entry + 1);
831	MAKE_STATS (cache->collision_1st_count++);
832
833	entry = entry->next;
834	if (!entry)
835	return strcache2_enter_string (cache, idx, str, length, hash);
836	if (strcache2_is_equal (cache, entry, str, length, hash))
837	return (const char *)(entry + 1);
838	MAKE_STATS (cache->collision_2nd_count++);
839
840	/* Loop the rest. */
841	for (;;)
842	{
843	entry = entry->next;
844	if (!entry)
845	return strcache2_enter_string (cache, idx, str, length, hash);
846	if (strcache2_is_equal (cache, entry, str, length, hash))
847	return (const char *)(entry + 1);
848	MAKE_STATS (cache->collision_3rd_count++);
849	}
850	/* not reached */
851	}
852
853	/* The public lookup (case sensitive) string interface. */
854	const char *
855	strcache2_lookup (struct strcache2 cache, const char str, unsigned int length)
856	{
857	struct strcache2_entry const *entry;
858	unsigned int hash = strcache2_case_sensitive_hash (str, length);
859	unsigned int idx;
860
861	assert (!cache->case_insensitive);
862	assert (!memchr (str, '\0', length));
863
864	MAKE_STATS (cache->lookup_count++);
865
866	/* Lookup the entry in the hash table, hoping for an
867	early match. */
868	idx = STRCACHE2_MOD_IT (cache, hash);
869	entry = cache->hash_tab[idx];
870	if (!entry)
871	return NULL;
872	if (strcache2_is_equal (cache, entry, str, length, hash))
873	return (const char *)(entry + 1);
874	MAKE_STATS (cache->collision_1st_count++);
875
876	entry = entry->next;
877	if (!entry)
878	return NULL;
879	if (strcache2_is_equal (cache, entry, str, length, hash))
880	return (const char *)(entry + 1);
881	MAKE_STATS (cache->collision_2nd_count++);
882
883	/* Loop the rest. */
884	for (;;)
885	{
886	entry = entry->next;
887	if (!entry)
888	return NULL;
889	if (strcache2_is_equal (cache, entry, str, length, hash))
890	return (const char *)(entry + 1);
891	MAKE_STATS (cache->collision_3rd_count++);
892	}
893	/* not reached */
894	}
895
896	#if defined(HAVE_CASE_INSENSITIVE_FS)
897
898	/* The public add string interface for case insensitive strings. */
899	const char *
900	strcache2_iadd (struct strcache2 cache, const char str, unsigned int length)
901	{
902	struct strcache2_entry const *entry;
903	unsigned int hash = strcache2_case_insensitive_hash (str, length);
904	unsigned int idx;
905
906	assert (cache->case_insensitive);
907	assert (!memchr (str, '\0', length));
908
909	MAKE_STATS (cache->lookup_count++);
910
911	/* Lookup the entry in the hash table, hoping for an
912	early match. If not found, enter the string at IDX. */
913	idx = STRCACHE2_MOD_IT (cache, hash);
914	entry = cache->hash_tab[idx];
915	if (!entry)
916	return strcache2_enter_string (cache, idx, str, length, hash);
917	if (strcache2_is_equal (cache, entry, str, length, hash))
918	return (const char *)(entry + 1);
919	MAKE_STATS (cache->collision_1st_count++);
920
921	entry = entry->next;
922	if (!entry)
923	return strcache2_enter_string (cache, idx, str, length, hash);
924	if (strcache2_is_equal (cache, entry, str, length, hash))
925	return (const char *)(entry + 1);
926	MAKE_STATS (cache->collision_2nd_count++);
927
928	/* Loop the rest. */
929	for (;;)
930	{
931	entry = entry->next;
932	if (!entry)
933	return strcache2_enter_string (cache, idx, str, length, hash);
934	if (strcache2_is_equal (cache, entry, str, length, hash))
935	return (const char *)(entry + 1);
936	MAKE_STATS (cache->collision_3rd_count++);
937	}
938	/* not reached */
939	}
940
941	/* The public add string interface for prehashed case insensitive strings.
942	Use strcache2_hash_istr to calculate the hash of a string. */
943	const char *
944	strcache2_iadd_hashed (struct strcache2 cache, const char str,
945	unsigned int length, unsigned int hash)
946	{
947	struct strcache2_entry const *entry;
948	unsigned int idx;
949	#ifndef NDEBUG
950	unsigned correct_hash;
951
952	assert (cache->case_insensitive);
953	assert (!memchr (str, '\0', length));
954	correct_hash = strcache2_case_insensitive_hash (str, length);
955	MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
956	#endif /* NDEBUG */
957
958	MAKE_STATS (cache->lookup_count++);
959
960	/* Lookup the entry in the hash table, hoping for an
961	early match. If not found, enter the string at IDX. */
962	idx = STRCACHE2_MOD_IT (cache, hash);
963	entry = cache->hash_tab[idx];
964	if (!entry)
965	return strcache2_enter_string (cache, idx, str, length, hash);
966	if (strcache2_is_equal (cache, entry, str, length, hash))
967	return (const char *)(entry + 1);
968	MAKE_STATS (cache->collision_1st_count++);
969
970	entry = entry->next;
971	if (!entry)
972	return strcache2_enter_string (cache, idx, str, length, hash);
973	if (strcache2_is_equal (cache, entry, str, length, hash))
974	return (const char *)(entry + 1);
975	MAKE_STATS (cache->collision_2nd_count++);
976
977	/* Loop the rest. */
978	for (;;)
979	{
980	entry = entry->next;
981	if (!entry)
982	return strcache2_enter_string (cache, idx, str, length, hash);
983	if (strcache2_is_equal (cache, entry, str, length, hash))
984	return (const char *)(entry + 1);
985	MAKE_STATS (cache->collision_3rd_count++);
986	}
987	/* not reached */
988	}
989
990	/* The public lookup (case insensitive) string interface. */
991	const char *
992	strcache2_ilookup (struct strcache2 cache, const char str, unsigned int length)
993	{
994	struct strcache2_entry const *entry;
995	unsigned int hash = strcache2_case_insensitive_hash (str, length);
996	unsigned int idx;
997
998	assert (cache->case_insensitive);
999	assert (!memchr (str, '\0', length));
1000
1001	MAKE_STATS (cache->lookup_count++);
1002
1003	/* Lookup the entry in the hash table, hoping for an
1004	early match. */
1005	idx = STRCACHE2_MOD_IT (cache, hash);
1006	entry = cache->hash_tab[idx];
1007	if (!entry)
1008	return NULL;
1009	if (strcache2_is_equal (cache, entry, str, length, hash))
1010	return (const char *)(entry + 1);
1011	MAKE_STATS (cache->collision_1st_count++);
1012
1013	entry = entry->next;
1014	if (!entry)
1015	return NULL;
1016	if (strcache2_is_equal (cache, entry, str, length, hash))
1017	return (const char *)(entry + 1);
1018	MAKE_STATS (cache->collision_2nd_count++);
1019
1020	/* Loop the rest. */
1021	for (;;)
1022	{
1023	entry = entry->next;
1024	if (!entry)
1025	return NULL;
1026	if (strcache2_is_equal (cache, entry, str, length, hash))
1027	return (const char *)(entry + 1);
1028	MAKE_STATS (cache->collision_3rd_count++);
1029	}
1030	/* not reached */
1031	}
1032
1033	#endif /* HAVE_CASE_INSENSITIVE_FS */
1034
1035	/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1036	int
1037	strcache2_is_cached (struct strcache2 cache, const char str)
1038	{
1039	/* Check mandatory alignment first. */
1040	if (!((size_t)str & (sizeof (void *) - 1)))
1041	{
1042	/* Check the segment list and consider the question answered if the
1043	string is within one of them. (Could check it more thoroughly...) */
1044	struct strcache2_seg const *seg;
1045	for (seg = cache->seg_head; seg; seg = seg->next)
1046	if ((size_t)(str - seg->start) < seg->size)
1047	return 1;
1048	}
1049
1050	return 0;
1051	}
1052
1053
1054	/* Verify the integrity of the specified string, returning 0 if OK. */
1055	int
1056	strcache2_verify_entry (struct strcache2 cache, const char str)
1057	{
1058	struct strcache2_entry const *entry;
1059	unsigned int hash;
1060	unsigned int length;
1061	const char *end;
1062
1063	entry = (struct strcache2_entry const *)str - 1;
1064	if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1065	{
1066	fprintf (stderr,
1067	"strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1068	cache->name, (void )entry, (void )str, str);
1069	return -1;
1070	}
1071
1072	end = memchr (str, '\0', entry->length + 1);
1073	length = end - str;
1074	if (length != entry->length)
1075	{
1076	fprintf (stderr,
1077	"strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1078	cache->name, (void *)entry, length, entry->length, str);
1079	return -1;
1080	}
1081
1082	hash = cache->case_insensitive
1083	? strcache2_case_insensitive_hash (str, entry->length)
1084	: strcache2_case_sensitive_hash (str, entry->length);
1085	if (hash != entry->hash)
1086	{
1087	fprintf (stderr,
1088	"strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1089	cache->name, (void *)entry, hash, entry->hash, str);
1090	return -1;
1091	}
1092
1093	return 0;
1094	}
1095
1096
1097	/* Calculates the case sensitive hash values of the string.
1098	The first hash is returned, the other is put at HASH2P. */
1099	unsigned int strcache2_hash_str (const char str, unsigned int length, unsigned int hash2p)
1100	{
1101	*hash2p = 1;
1102	return strcache2_case_sensitive_hash (str, length);
1103	}
1104
1105	/* Calculates the case insensitive hash values of the string.
1106	The first hash is returned, the other is put at HASH2P. */
1107	unsigned int strcache2_hash_istr (const char str, unsigned int length, unsigned int hash2p)
1108	{
1109	*hash2p = 1;
1110	return strcache2_case_insensitive_hash (str, length);
1111	}
1112
1113
1114
1115	/* Initalizes a new cache. */
1116	void
1117	strcache2_init (struct strcache2 cache, const char name, unsigned int size,
1118	unsigned int def_seg_size, int case_insensitive, int thread_safe)
1119	{
1120	unsigned hash_shift;
1121	assert (!thread_safe);
1122
1123	/* calc the size as a power of two */
1124	if (!size)
1125	hash_shift = STRCACHE2_HASH_SHIFT;
1126	else
1127	{
1128	assert (size <= (~0U / 2 + 1));
1129	for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1130	/* nothing */;
1131	}
1132
1133	/* adjust the default segment size */
1134	if (!def_seg_size)
1135	def_seg_size = STRCACHE2_SEG_SIZE;
1136	else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1137	def_seg_size = sizeof (struct strcache2_seg) * 10;
1138	else if ((def_seg_size & 0xfff) < 0xf00)
1139	def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1140
1141
1142	/* init the structure. */
1143	cache->case_insensitive = case_insensitive;
1144	#ifdef STRCACHE2_USE_MASK
1145	cache->hash_mask = (1U << hash_shift) - 1U;
1146	#else
1147	cache->hash_div = strcache2_find_prime(hash_shift);
1148	#endif
1149	cache->count = 0;
1150	cache->collision_count = 0;
1151	cache->lookup_count = 0;
1152	cache->collision_1st_count = 0;
1153	cache->collision_2nd_count = 0;
1154	cache->collision_3rd_count = 0;
1155	cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1156	cache->init_size = 1U << hash_shift;
1157	cache->hash_size = 1U << hash_shift;
1158	cache->def_seg_size = def_seg_size;
1159	cache->lock = NULL;
1160	cache->name = name;
1161
1162	/* allocate the hash table and first segment. */
1163	cache->hash_tab = (struct strcache2_entry **)
1164	xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1165	memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1166	strcache2_new_seg (cache, 0);
1167
1168	/* link it */
1169	cache->next = strcache_head;
1170	strcache_head = cache;
1171	}
1172
1173
1174	/* Terminates a string cache, freeing all memory and other
1175	associated resources. */
1176	void
1177	strcache2_term (struct strcache2 *cache)
1178	{
1179	/* unlink it */
1180	if (strcache_head == cache)
1181	strcache_head = cache->next;
1182	else
1183	{
1184	struct strcache2 *prev = strcache_head;
1185	while (prev->next != cache)
1186	prev = prev->next;
1187	assert (prev);
1188	prev->next = cache->next;
1189	}
1190
1191	/* free the memory segments */
1192	do
1193	{
1194	void *free_it = cache->seg_head;
1195	cache->seg_head = cache->seg_head->next;
1196	free (free_it);
1197	}
1198	while (cache->seg_head);
1199
1200	/* free the hash and clear the structure. */
1201	free (cache->hash_tab);
1202	memset (cache, '\0', sizeof (struct strcache2));
1203	}
1204
1205	/* Print statistics a string cache. */
1206	void
1207	strcache2_print_stats (struct strcache2 cache, const char prefix)
1208	{
1209	unsigned int seg_count = 0;
1210	unsigned long seg_total_bytes = 0;
1211	unsigned long seg_avg_bytes;
1212	unsigned long seg_avail_bytes = 0;
1213	unsigned long seg_max_bytes = 0;
1214	struct strcache2_seg *seg;
1215	unsigned int str_count = 0;
1216	unsigned long str_total_len = 0;
1217	unsigned int str_avg_len;
1218	unsigned int str_min_len = ~0U;
1219	unsigned int str_max_len = 0;
1220	unsigned int idx;
1221	unsigned int rehashes;
1222	unsigned int chain_depths[32];
1223
1224	printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1225
1226	/* Segment statistics. */
1227	for (seg = cache->seg_head; seg; seg = seg->next)
1228	{
1229	seg_count++;
1230	seg_total_bytes += seg->size;
1231	seg_avail_bytes += seg->avail;
1232	if (seg->size > seg_max_bytes)
1233	seg_max_bytes = seg->size;
1234	}
1235	seg_avg_bytes = seg_total_bytes / seg_count;
1236	printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1237	prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1238	cache->def_seg_size, seg_avail_bytes);
1239
1240	/* String statistics. */
1241	memset (chain_depths, '\0', sizeof (chain_depths));
1242	idx = cache->hash_size;
1243	while (idx-- > 0)
1244	{
1245	struct strcache2_entry const *entry = cache->hash_tab[idx];
1246	unsigned int depth = 0;
1247	for (; entry != 0; entry = entry->next, depth++)
1248	{
1249	unsigned int length = entry->length;
1250	str_total_len += length;
1251	if (length > str_max_len)
1252	str_max_len = length;
1253	if (length < str_min_len)
1254	str_min_len = length;
1255	str_count++;
1256	}
1257	chain_depths[depth >= 32 ? 31 : depth]++;
1258	}
1259	str_avg_len = cache->count ? str_total_len / cache->count : 0;
1260	printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1261	prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1262	if (str_count != cache->count)
1263	printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1264	cache->count, str_count);
1265
1266	/* Hash statistics. */
1267	idx = cache->init_size;
1268	rehashes = 0;
1269	while (idx < cache->hash_size)
1270	{
1271	idx *= 2;
1272	rehashes++;
1273	}
1274
1275	#ifdef STRCACHE2_USE_MASK
1276	printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1277	prefix, cache->hash_size, cache->hash_mask, rehashes);
1278	#else
1279	printf (_("%s hash size = %u div = %#x rehashed %u times"),
1280	prefix, cache->hash_size, cache->hash_div, rehashes);
1281	#endif
1282	if (cache->lookup_count)
1283	printf (_("%s lookups = %lu\n"
1284	"%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1285	prefix, cache->lookup_count,
1286	prefix,
1287	cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1288	cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1289	cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1290	printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1291	prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1292	printf (_("%s %5u (%u%%) empty hash table slots\n"),
1293	prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1294	printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1295	prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1296	for (idx = 2; idx < 32; idx++)
1297	{
1298	unsigned strs_at_this_depth = chain_depths[idx];
1299	unsigned i;
1300	for (i = idx + 1; i < 32; i++)
1301	strs_at_this_depth += chain_depths[i];
1302	if (strs_at_this_depth)
1303	printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1304	prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1305	idx - 1, idx == 2 ? " " : "s",
1306	strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1307	}
1308	}
1309
1310	/* Print statistics for all string caches. */
1311	void
1312	strcache2_print_stats_all (const char *prefix)
1313	{
1314	struct strcache2 *cur;
1315	for (cur = strcache_head; cur; cur = cur->next)
1316	strcache2_print_stats (cur, prefix);
1317	}
1318

Note: See TracBrowser for help on using the repository browser.

source: kBuild/trunk/src/kmk/strcache2.c@ 2912

Download in other formats: