strcache2.c@ 2657

Last change on this file since 2657 was 2413, checked in by bird, 14 years ago
copyright year update.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 38.5 KB

Line
1	/* $Id: strcache2.c 2413 2010-09-11 17:43:04Z bird $ */
2	/** @file
3	* strcache2 - New string cache.
4	*/
5
6	/*
7	* Copyright (c) 2008-2010 knut st. osmundsen <[email protected]>
8	*
9	* This file is part of kBuild.
10	*
11	* kBuild is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 3 of the License, or
14	* (at your option) any later version.
15	*
16	* kBuild is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with kBuild. If not, see <http://www.gnu.org/licenses/>
23	*
24	*/
25
26	/*******************************************************************************
27	* Header Files *
28	*******************************************************************************/
29	#include "make.h"
30	#include "strcache2.h"
31
32	#include <assert.h>
33
34	#include "debug.h"
35
36	#ifdef _MSC_VER
37	typedef unsigned char uint8_t;
38	typedef unsigned short uint16_t;
39	typedef unsigned int uint32_t;
40	typedef signed char int8_t;
41	typedef signed short int16_t;
42	typedef signed int int32_t;
43	#else
44	# include <stdint.h>
45	#endif
46
47	#ifdef WINDOWS32
48	# include <io.h>
49	# include <process.h>
50	# include <Windows.h>
51	# define PARSE_IN_WORKER
52	#endif
53
54	#ifdef __OS2__
55	# include <sys/fmutex.h>
56	#endif
57
58	#ifdef HAVE_PTHREAD
59	# include <pthread.h>
60	#endif
61
62
63	/*******************************************************************************
64	* Defined Constants And Macros *
65	*******************************************************************************/
66	/* The default size of a memory segment (1MB). */
67	#define STRCACHE2_SEG_SIZE (1024U*1024U)
68	/* The default hash table shift (hash size give as a power of two). */
69	#define STRCACHE2_HASH_SHIFT 16
70	/** Does the modding / masking of a hash number into an index. */
71	#ifdef STRCACHE2_USE_MASK
72	# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73	#else
74	# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75	#endif
76
77	# if defined(__amd64__) \|\| defined(__x86_64__) \|\| defined(__AMD64__) \|\| defined(_M_X64) \|\| defined(__amd64) \
78	\|\| defined(__i386__) \|\| defined(__x86__) \|\| defined(__X86__) \|\| defined(_M_IX86) \|\| defined(__i386)
79	# define strcache2_get_unaligned_16bits(ptr) ( ((const uint16_t )(ptr)))
80	# else
81	/* (endian doesn't matter) */
82	# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
83	\| (((const uint8_t *)(ptr))[1]) )
84	# endif
85
86
87	/*******************************************************************************
88	* Global Variables *
89	*******************************************************************************/
90	/* List of initialized string caches. */
91	static struct strcache2 *strcache_head;
92
93
94	/** Finds the closest primary number for power of two value (or something else
95	* useful if not support). */
96	MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
97	{
98	switch (shift)
99	{
100	case 5: return 31;
101	case 6: return 61;
102	case 7: return 127;
103	case 8: return 251;
104	case 9: return 509;
105	case 10: return 1021;
106	case 11: return 2039;
107	case 12: return 4093;
108	case 13: return 8191;
109	case 14: return 16381;
110	case 15: return 32749;
111	case 16: return 65521;
112	case 17: return 131063;
113	case 18: return 262139;
114	case 19: return 524269;
115	case 20: return 1048573;
116	case 21: return 2097143;
117	case 22: return 4194301;
118	case 23: return 8388593;
119
120	default:
121	assert (0);
122	return (1 << shift) - 1;
123	}
124	}
125
126	/* The following is a bit experiment. It produces longer chains, i.e. worse
127	distribution of the strings in the table, however the actual make
128	performances is better (<time). The explanation is probably that the
129	collisions only really increase for entries that aren't looked up that
130	much and that it actually improoves the situation for those that is. Or
131	that we spend so much less time hashing that it makes up (and more) for
132	the pentalty we suffer from the longer chains and worse distribution.
133
134	XXX: Check how this works out with different path lengths. I suspect it
135	might depend on the length of PATH_ROOT and the depth of the files
136	in the project as well. If it does, this might make matters worse
137	for some and better for others which isn't very cool... */
138
139	#if 0
140	# define BIG_HASH_SIZE 32 /* kinda fast */
141	# define BIG_HASH_HEAD 16
142	# define BIG_HASH_TAIL 12
143	#elif 0
144	# define BIG_HASH_SIZE 68 /* kinda safe */
145	# define BIG_HASH_HEAD 24
146	# define BIG_HASH_TAIL 24
147	#elif 0
148	# define BIG_HASH_SIZE 128 /* safe */
149	# define BIG_HASH_HEAD 32
150	# define BIG_HASH_TAIL 32
151	#endif
152
153	#ifdef BIG_HASH_SIZE
154	/* long string: hash head and tail, drop the middle. */
155	MY_INLINE unsigned int
156	strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
157	{
158	uint32_t hash = len;
159	uint32_t tmp;
160	unsigned int head;
161
162	/* head BIG_HASH_HEAD bytes */
163	head = (BIG_HASH_HEAD >> 2);
164	while (head-- > 0)
165	{
166	hash += strcache2_get_unaligned_16bits (str);
167	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
168	hash = (hash << 16) ^ tmp;
169	str += 2 * sizeof (uint16_t);
170	hash += hash >> 11;
171	}
172
173	/* tail BIG_HASH_TAIL bytes (minus the odd ones) */
174	str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
175	head = (BIG_HASH_TAIL >> 2);
176	while (head-- > 0)
177	{
178	hash += strcache2_get_unaligned_16bits (str);
179	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
180	hash = (hash << 16) ^ tmp;
181	str += 2 * sizeof (uint16_t);
182	hash += hash >> 11;
183	}
184
185	/* force "avalanching" of final 127 bits. */
186	hash ^= hash << 3;
187	hash += hash >> 5;
188	hash ^= hash << 4;
189	hash += hash >> 17;
190	hash ^= hash << 25;
191	hash += hash >> 6;
192
193	return hash;
194	}
195	#endif /* BIG_HASH_SIZE */
196
197	MY_INLINE unsigned int
198	strcache2_case_sensitive_hash (const char *str, unsigned int len)
199	{
200	#if 1
201	/* Paul Hsieh hash SuperFast function:
202	http://www.azillionmonkeys.com/qed/hash.html
203
204	This performs very good and as a sligtly better distribution than
205	STRING_N_HASH_1 on a typical kBuild run.
206
207	It is also 37% faster than return_STRING_N_HASH_1 when running the
208	two 100 times over typical kBuild strings that end up here (did a
209	fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
210	with -O2.
211
212	FIXME: A path for well aligned data should be added to speed up
213	execution on alignment sensitive systems. */
214	unsigned int rem;
215	uint32_t hash;
216	uint32_t tmp;
217
218	assert (sizeof (uint8_t) == sizeof (char));
219
220	# ifdef BIG_HASH_SIZE
221	/* long string? */
222	# if 0 /BIG_HASH_SIZE > 128/
223	if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
224	# else
225	if (len >= BIG_HASH_SIZE)
226	# endif
227	return strcache2_case_sensitive_hash_big (str, len);
228	# endif
229
230	/* short string: main loop, walking on 2 x uint16_t */
231	hash = len;
232	rem = len & 3;
233	len >>= 2;
234	while (len > 0)
235	{
236	hash += strcache2_get_unaligned_16bits (str);
237	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
238	hash = (hash << 16) ^ tmp;
239	str += 2 * sizeof (uint16_t);
240	hash += hash >> 11;
241	len--;
242	}
243
244	/* the remainder */
245	switch (rem)
246	{
247	case 3:
248	hash += strcache2_get_unaligned_16bits (str);
249	hash ^= hash << 16;
250	hash ^= str[sizeof (uint16_t)] << 18;
251	hash += hash >> 11;
252	break;
253	case 2:
254	hash += strcache2_get_unaligned_16bits (str);
255	hash ^= hash << 11;
256	hash += hash >> 17;
257	break;
258	case 1:
259	hash += *str;
260	hash ^= hash << 10;
261	hash += hash >> 1;
262	break;
263	}
264
265	/* force "avalanching" of final 127 bits. */
266	hash ^= hash << 3;
267	hash += hash >> 5;
268	hash ^= hash << 4;
269	hash += hash >> 17;
270	hash ^= hash << 25;
271	hash += hash >> 6;
272
273	return hash;
274
275	#elif 1
276	/* Note! This implementation is 18% faster than return_STRING_N_HASH_1
277	when running the two 100 times over typical kBuild strings that
278	end up here (did a fprintf here and built kBuild).
279	Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
280
281	unsigned int hash = 0;
282	if (MY_PREDICT_TRUE(len >= 2))
283	{
284	unsigned int ch0 = *str++;
285	hash = 0;
286	len--;
287	while (len >= 2)
288	{
289	unsigned int ch1 = *str++;
290	hash += ch0 << (ch1 & 0xf);
291
292	ch0 = *str++;
293	hash += ch1 << (ch0 & 0xf);
294
295	len -= 2;
296	}
297	if (len == 1)
298	{
299	unsigned ch1 = *str;
300	hash += ch0 << (ch1 & 0xf);
301
302	hash += ch1;
303	}
304	else
305	hash += ch0;
306	}
307	else if (len)
308	{
309	hash = *str;
310	hash += hash << (hash & 0xf);
311	}
312	else
313	hash = 0;
314	return hash;
315
316	#elif 1
317	# if 0
318	/* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
319	than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
320	first in the assignment made it noticably slower.)
321
322	However, it is noticably slower in practice, most likely because of more
323	collisions. Hrmpf. */
324
325	# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
326	unsigned int hash = 0;
327
328	# else
329	/* This is DJB2. This specific form/unroll was benchmarked to be 27%
330	fast than return_STRING_N_HASH_1.
331
332	Ditto. */
333
334	# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
335	unsigned int hash = 5381;
336	# endif
337
338
339	while (len >= 4)
340	{
341	UPDATE_HASH (str[0]);
342	UPDATE_HASH (str[1]);
343	UPDATE_HASH (str[2]);
344	UPDATE_HASH (str[3]);
345	str += 4;
346	len -= 4;
347	}
348	switch (len)
349	{
350	default:
351	case 0:
352	return hash;
353	case 1:
354	UPDATE_HASH (str[0]);
355	return hash;
356	case 2:
357	UPDATE_HASH (str[0]);
358	UPDATE_HASH (str[1]);
359	return hash;
360	case 3:
361	UPDATE_HASH (str[0]);
362	UPDATE_HASH (str[1]);
363	UPDATE_HASH (str[2]);
364	return hash;
365	}
366	#endif
367	}
368
369	MY_INLINE unsigned int
370	strcache2_case_insensitive_hash (const char *str, unsigned int len)
371	{
372	unsigned int hash = 0;
373	if (MY_PREDICT_TRUE(len >= 2))
374	{
375	unsigned int ch0 = *str++;
376	ch0 = tolower (ch0);
377	hash = 0;
378	len--;
379	while (len >= 2)
380	{
381	unsigned int ch1 = *str++;
382	ch1 = tolower (ch1);
383	hash += ch0 << (ch1 & 0xf);
384
385	ch0 = *str++;
386	ch0 = tolower (ch0);
387	hash += ch1 << (ch0 & 0xf);
388
389	len -= 2;
390	}
391	if (len == 1)
392	{
393	unsigned ch1 = *str;
394	ch1 = tolower (ch1);
395	hash += ch0 << (ch1 & 0xf);
396
397	hash += ch1;
398	}
399	else
400	hash += ch0;
401	}
402	else if (len)
403	{
404	hash = *str;
405	hash += hash << (hash & 0xf);
406	}
407	else
408	hash = 0;
409	return hash;
410	}
411
412	MY_INLINE int
413	strcache2_memcmp_inline_short (const char xs, const char ys, unsigned int length)
414	{
415	if (length <= 8)
416	{
417	/* short string compare - ~50% of the kBuild calls. */
418	assert ( !((size_t)ys & 3) );
419	if (!((size_t)xs & 3))
420	{
421	/* aligned */
422	int result;
423	switch (length)
424	{
425	default: /* memcmp for longer strings */
426	return memcmp (xs, ys, length);
427	case 8:
428	result = (int32_t)(xs + 4) - (int32_t)(ys + 4);
429	result \|= (int32_t)xs - (int32_t)ys;
430	return result;
431	case 7:
432	result = xs[6] - ys[6];
433	result \|= xs[5] - ys[5];
434	result \|= xs[4] - ys[4];
435	result \|= (int32_t)xs - (int32_t)ys;
436	return result;
437	case 6:
438	result = xs[5] - ys[5];
439	result \|= xs[4] - ys[4];
440	result \|= (int32_t)xs - (int32_t)ys;
441	return result;
442	case 5:
443	result = xs[4] - ys[4];
444	result \|= (int32_t)xs - (int32_t)ys;
445	return result;
446	case 4:
447	return (int32_t)xs - (int32_t)ys;
448	case 3:
449	result = xs[2] - ys[2];
450	result \|= xs[1] - ys[1];
451	result \|= xs[0] - ys[0];
452	return result;
453	case 2:
454	result = xs[1] - ys[1];
455	result \|= xs[0] - ys[0];
456	return result;
457	case 1:
458	return xs - ys;
459	case 0:
460	return 0;
461	}
462	}
463	else
464	{
465	/* unaligned */
466	int result = 0;
467	switch (length)
468	{
469	case 8: result \|= xs[7] - ys[7];
470	case 7: result \|= xs[6] - ys[6];
471	case 6: result \|= xs[5] - ys[5];
472	case 5: result \|= xs[4] - ys[4];
473	case 4: result \|= xs[3] - ys[3];
474	case 3: result \|= xs[2] - ys[2];
475	case 2: result \|= xs[1] - ys[1];
476	case 1: result \|= xs[0] - ys[0];
477	case 0:
478	return result;
479	}
480	}
481	}
482
483	/* memcmp for longer strings */
484	return memcmp (xs, ys, length);
485	}
486
487	MY_INLINE int
488	strcache2_memcmp_inlined (const char xs, const char ys, unsigned int length)
489	{
490	#ifndef ELECTRIC_HEAP
491	assert ( !((size_t)ys & 3) );
492	#endif
493	if (!((size_t)xs & 3))
494	{
495	int result;
496	/* aligned */
497	while (length >= 8)
498	{
499	result = (int32_t)xs - (int32_t)ys;
500	result \|= (int32_t)(xs + 4) - (int32_t)(ys + 4);
501	if (MY_PREDICT_FALSE(result))
502	return result;
503	xs += 8;
504	ys += 8;
505	length -= 8;
506	}
507	switch (length)
508	{
509	case 7:
510	result = (int32_t)xs - (int32_t)ys;
511	result \|= xs[6] - ys[6];
512	result \|= xs[5] - ys[5];
513	result \|= xs[4] - ys[4];
514	return result;
515	case 6:
516	result = (int32_t)xs - (int32_t)ys;
517	result \|= xs[5] - ys[5];
518	result \|= xs[4] - ys[4];
519	return result;
520	case 5:
521	result = (int32_t)xs - (int32_t)ys;
522	result \|= xs[4] - ys[4];
523	return result;
524	case 4:
525	return (int32_t)xs - (int32_t)ys;
526	case 3:
527	result = xs[2] - ys[2];
528	result \|= xs[1] - ys[1];
529	result \|= xs[0] - ys[0];
530	return result;
531	case 2:
532	result = xs[1] - ys[1];
533	result \|= xs[0] - ys[0];
534	return result;
535	case 1:
536	return xs - ys;
537	default:
538	case 0:
539	return 0;
540	}
541	}
542	else
543	{
544	/* unaligned */
545	int result;
546	while (length >= 8)
547	{
548	#if defined(__i386__) \|\| defined(__x86_64__)
549	result = ( ((int32_t)xs[3] << 24)
550	\| ((int32_t)xs[2] << 16)
551	\| ((int32_t)xs[1] << 8)
552	\| xs[0] )
553	- (int32_t)ys;
554	result \|= ( ((int32_t)xs[7] << 24)
555	\| ((int32_t)xs[6] << 16)
556	\| ((int32_t)xs[5] << 8)
557	\| xs[4] )
558	- (int32_t)(ys + 4);
559	#else
560	result = xs[3] - ys[3];
561	result \|= xs[2] - ys[2];
562	result \|= xs[1] - ys[1];
563	result \|= xs[0] - ys[0];
564	result \|= xs[7] - ys[7];
565	result \|= xs[6] - ys[6];
566	result \|= xs[5] - ys[5];
567	result \|= xs[4] - ys[4];
568	#endif
569	if (MY_PREDICT_FALSE(result))
570	return result;
571	xs += 8;
572	ys += 8;
573	length -= 8;
574	}
575	result = 0;
576	switch (length)
577	{
578	case 7: result \|= xs[6] - ys[6];
579	case 6: result \|= xs[5] - ys[5];
580	case 5: result \|= xs[4] - ys[4];
581	case 4: result \|= xs[3] - ys[3];
582	case 3: result \|= xs[2] - ys[2];
583	case 2: result \|= xs[1] - ys[1];
584	case 1: result \|= xs[0] - ys[0];
585	return result;
586	default:
587	case 0:
588	return 0;
589	}
590	}
591	}
592
593	MY_INLINE int
594	strcache2_is_equal (struct strcache2 cache, struct strcache2_entry const entry,
595	const char *str, unsigned int length, unsigned int hash)
596	{
597	assert (!cache->case_insensitive);
598
599	/* the simple stuff first. */
600	if ( entry->hash != hash
601	\|\| entry->length != length)
602	return 0;
603
604	#if 0
605	return memcmp (str, entry + 1, length) == 0;
606	#elif 1
607	return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
608	#else
609	return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
610	#endif
611	}
612
613	MY_INLINE int
614	strcache2_is_iequal (struct strcache2 cache, struct strcache2_entry const entry,
615	const char *str, unsigned int length, unsigned int hash)
616	{
617	assert (cache->case_insensitive);
618
619	/* the simple stuff first. */
620	if ( entry->hash != hash
621	\|\| entry->length != length)
622	return 0;
623
624	#if defined(_MSC_VER) \|\| defined(__OS2__)
625	return _memicmp (entry + 1, str, length) == 0;
626	#else
627	return strncasecmp ((const char *)(entry + 1), str, length) == 0;
628	#endif
629	}
630
631	static void
632	strcache2_rehash (struct strcache2 *cache)
633	{
634	unsigned int src = cache->hash_size;
635	struct strcache2_entry **src_tab = cache->hash_tab;
636	struct strcache2_entry **dst_tab;
637	#ifndef STRCACHE2_USE_MASK
638	unsigned int hash_shift;
639	#endif
640
641	/* Allocate a new hash table twice the size of the current. */
642	cache->hash_size <<= 1;
643	#ifdef STRCACHE2_USE_MASK
644	cache->hash_mask <<= 1;
645	cache->hash_mask \|= 1;
646	#else
647	for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
648	/* nothing */;
649	cache->hash_div = strcache2_find_prime (hash_shift);
650	#endif
651	cache->rehash_count <<= 1;
652	cache->hash_tab = dst_tab = (struct strcache2_entry **)
653	xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
654	memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
655
656	/* Copy the entries from the old to the new hash table. */
657	cache->collision_count = 0;
658	while (src-- > 0)
659	{
660	struct strcache2_entry *entry = src_tab[src];
661	while (entry)
662	{
663	struct strcache2_entry *next = entry->next;
664	unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
665	if ((entry->next = dst_tab[dst]) != 0)
666	cache->collision_count++;
667	dst_tab[dst] = entry;
668
669	entry = next;
670	}
671	}
672
673	/* That's it, just free the old table and we're done. */
674	free (src_tab);
675	}
676
677	static struct strcache2_seg *
678	strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
679	{
680	struct strcache2_seg *seg;
681	size_t size;
682	size_t off;
683
684	size = cache->def_seg_size;
685	if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
686	{
687	size = (size_t)minlen * 2;
688	size = (size + 0xfff) & ~(size_t)0xfff;
689	}
690
691	seg = xmalloc (size);
692	seg->start = (char *)(seg + 1);
693	seg->size = size - sizeof (struct strcache2_seg);
694	off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
695	if (off)
696	{
697	off = STRCACHE2_ENTRY_ALIGNMENT - off;
698	seg->start += off;
699	seg->size -= off;
700	}
701	assert (seg->size > minlen);
702	seg->cursor = seg->start;
703	seg->avail = seg->size;
704
705	seg->next = cache->seg_head;
706	cache->seg_head = seg;
707
708	return seg;
709	}
710
711	/* Internal worker that enters a new string into the cache. */
712	static const char *
713	strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
714	const char *str, unsigned int length,
715	unsigned int hash)
716	{
717	struct strcache2_entry *entry;
718	struct strcache2_seg *seg;
719	unsigned int size;
720	char *str_copy;
721
722	/* Allocate space for the string. */
723
724	size = length + 1 + sizeof (struct strcache2_entry);
725	size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
726
727	seg = cache->seg_head;
728	if (MY_PREDICT_FALSE(seg->avail < size))
729	{
730	do
731	seg = seg->next;
732	while (seg && seg->avail < size);
733	if (!seg)
734	seg = strcache2_new_seg (cache, size);
735	}
736
737	entry = (struct strcache2_entry *) seg->cursor;
738	assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
739	seg->cursor += size;
740	seg->avail -= size;
741
742	/* Setup the entry, copy the string and insert it into the hash table. */
743
744	entry->user = NULL;
745	entry->length = length;
746	entry->hash = hash;
747	str_copy = (char *) memcpy (entry + 1, str, length);
748	str_copy[length] = '\0';
749
750	if ((entry->next = cache->hash_tab[idx]) != 0)
751	cache->collision_count++;
752	cache->hash_tab[idx] = entry;
753	cache->count++;
754	if (cache->count >= cache->rehash_count)
755	strcache2_rehash (cache);
756
757	return str_copy;
758	}
759
760	/* The public add string interface. */
761	const char *
762	strcache2_add (struct strcache2 cache, const char str, unsigned int length)
763	{
764	struct strcache2_entry const *entry;
765	unsigned int hash = strcache2_case_sensitive_hash (str, length);
766	unsigned int idx;
767
768	assert (!cache->case_insensitive);
769	assert (!memchr (str, '\0', length));
770
771	MAKE_STATS (cache->lookup_count++);
772
773	/* Lookup the entry in the hash table, hoping for an
774	early match. If not found, enter the string at IDX. */
775	idx = STRCACHE2_MOD_IT (cache, hash);
776	entry = cache->hash_tab[idx];
777	if (!entry)
778	return strcache2_enter_string (cache, idx, str, length, hash);
779	if (strcache2_is_equal (cache, entry, str, length, hash))
780	return (const char *)(entry + 1);
781	MAKE_STATS (cache->collision_1st_count++);
782
783	entry = entry->next;
784	if (!entry)
785	return strcache2_enter_string (cache, idx, str, length, hash);
786	if (strcache2_is_equal (cache, entry, str, length, hash))
787	return (const char *)(entry + 1);
788	MAKE_STATS (cache->collision_2nd_count++);
789
790	/* Loop the rest. */
791	for (;;)
792	{
793	entry = entry->next;
794	if (!entry)
795	return strcache2_enter_string (cache, idx, str, length, hash);
796	if (strcache2_is_equal (cache, entry, str, length, hash))
797	return (const char *)(entry + 1);
798	MAKE_STATS (cache->collision_3rd_count++);
799	}
800	/* not reached */
801	}
802
803	/* The public add string interface for prehashed strings.
804	Use strcache2_hash_str to calculate the hash of a string. */
805	const char *
806	strcache2_add_hashed (struct strcache2 cache, const char str,
807	unsigned int length, unsigned int hash)
808	{
809	struct strcache2_entry const *entry;
810	unsigned int idx;
811	#ifndef NDEBUG
812	unsigned correct_hash;
813
814	assert (!cache->case_insensitive);
815	assert (!memchr (str, '\0', length));
816	correct_hash = strcache2_case_sensitive_hash (str, length);
817	MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
818	#endif /* NDEBUG */
819
820	MAKE_STATS (cache->lookup_count++);
821
822	/* Lookup the entry in the hash table, hoping for an
823	early match. If not found, enter the string at IDX. */
824	idx = STRCACHE2_MOD_IT (cache, hash);
825	entry = cache->hash_tab[idx];
826	if (!entry)
827	return strcache2_enter_string (cache, idx, str, length, hash);
828	if (strcache2_is_equal (cache, entry, str, length, hash))
829	return (const char *)(entry + 1);
830	MAKE_STATS (cache->collision_1st_count++);
831
832	entry = entry->next;
833	if (!entry)
834	return strcache2_enter_string (cache, idx, str, length, hash);
835	if (strcache2_is_equal (cache, entry, str, length, hash))
836	return (const char *)(entry + 1);
837	MAKE_STATS (cache->collision_2nd_count++);
838
839	/* Loop the rest. */
840	for (;;)
841	{
842	entry = entry->next;
843	if (!entry)
844	return strcache2_enter_string (cache, idx, str, length, hash);
845	if (strcache2_is_equal (cache, entry, str, length, hash))
846	return (const char *)(entry + 1);
847	MAKE_STATS (cache->collision_3rd_count++);
848	}
849	/* not reached */
850	}
851
852	/* The public lookup (case sensitive) string interface. */
853	const char *
854	strcache2_lookup (struct strcache2 cache, const char str, unsigned int length)
855	{
856	struct strcache2_entry const *entry;
857	unsigned int hash = strcache2_case_sensitive_hash (str, length);
858	unsigned int idx;
859
860	assert (!cache->case_insensitive);
861	assert (!memchr (str, '\0', length));
862
863	MAKE_STATS (cache->lookup_count++);
864
865	/* Lookup the entry in the hash table, hoping for an
866	early match. */
867	idx = STRCACHE2_MOD_IT (cache, hash);
868	entry = cache->hash_tab[idx];
869	if (!entry)
870	return NULL;
871	if (strcache2_is_equal (cache, entry, str, length, hash))
872	return (const char *)(entry + 1);
873	MAKE_STATS (cache->collision_1st_count++);
874
875	entry = entry->next;
876	if (!entry)
877	return NULL;
878	if (strcache2_is_equal (cache, entry, str, length, hash))
879	return (const char *)(entry + 1);
880	MAKE_STATS (cache->collision_2nd_count++);
881
882	/* Loop the rest. */
883	for (;;)
884	{
885	entry = entry->next;
886	if (!entry)
887	return NULL;
888	if (strcache2_is_equal (cache, entry, str, length, hash))
889	return (const char *)(entry + 1);
890	MAKE_STATS (cache->collision_3rd_count++);
891	}
892	/* not reached */
893	}
894
895	#if defined(HAVE_CASE_INSENSITIVE_FS)
896
897	/* The public add string interface for case insensitive strings. */
898	const char *
899	strcache2_iadd (struct strcache2 cache, const char str, unsigned int length)
900	{
901	struct strcache2_entry const *entry;
902	unsigned int hash = strcache2_case_insensitive_hash (str, length);
903	unsigned int idx;
904
905	assert (cache->case_insensitive);
906	assert (!memchr (str, '\0', length));
907
908	MAKE_STATS (cache->lookup_count++);
909
910	/* Lookup the entry in the hash table, hoping for an
911	early match. If not found, enter the string at IDX. */
912	idx = STRCACHE2_MOD_IT (cache, hash);
913	entry = cache->hash_tab[idx];
914	if (!entry)
915	return strcache2_enter_string (cache, idx, str, length, hash);
916	if (strcache2_is_equal (cache, entry, str, length, hash))
917	return (const char *)(entry + 1);
918	MAKE_STATS (cache->collision_1st_count++);
919
920	entry = entry->next;
921	if (!entry)
922	return strcache2_enter_string (cache, idx, str, length, hash);
923	if (strcache2_is_equal (cache, entry, str, length, hash))
924	return (const char *)(entry + 1);
925	MAKE_STATS (cache->collision_2nd_count++);
926
927	/* Loop the rest. */
928	for (;;)
929	{
930	entry = entry->next;
931	if (!entry)
932	return strcache2_enter_string (cache, idx, str, length, hash);
933	if (strcache2_is_equal (cache, entry, str, length, hash))
934	return (const char *)(entry + 1);
935	MAKE_STATS (cache->collision_3rd_count++);
936	}
937	/* not reached */
938	}
939
940	/* The public add string interface for prehashed case insensitive strings.
941	Use strcache2_hash_istr to calculate the hash of a string. */
942	const char *
943	strcache2_iadd_hashed (struct strcache2 cache, const char str,
944	unsigned int length, unsigned int hash)
945	{
946	struct strcache2_entry const *entry;
947	unsigned int idx;
948	#ifndef NDEBUG
949	unsigned correct_hash;
950
951	assert (cache->case_insensitive);
952	assert (!memchr (str, '\0', length));
953	correct_hash = strcache2_case_insensitive_hash (str, length);
954	MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
955	#endif /* NDEBUG */
956
957	MAKE_STATS (cache->lookup_count++);
958
959	/* Lookup the entry in the hash table, hoping for an
960	early match. If not found, enter the string at IDX. */
961	idx = STRCACHE2_MOD_IT (cache, hash);
962	entry = cache->hash_tab[idx];
963	if (!entry)
964	return strcache2_enter_string (cache, idx, str, length, hash);
965	if (strcache2_is_equal (cache, entry, str, length, hash))
966	return (const char *)(entry + 1);
967	MAKE_STATS (cache->collision_1st_count++);
968
969	entry = entry->next;
970	if (!entry)
971	return strcache2_enter_string (cache, idx, str, length, hash);
972	if (strcache2_is_equal (cache, entry, str, length, hash))
973	return (const char *)(entry + 1);
974	MAKE_STATS (cache->collision_2nd_count++);
975
976	/* Loop the rest. */
977	for (;;)
978	{
979	entry = entry->next;
980	if (!entry)
981	return strcache2_enter_string (cache, idx, str, length, hash);
982	if (strcache2_is_equal (cache, entry, str, length, hash))
983	return (const char *)(entry + 1);
984	MAKE_STATS (cache->collision_3rd_count++);
985	}
986	/* not reached */
987	}
988
989	/* The public lookup (case insensitive) string interface. */
990	const char *
991	strcache2_ilookup (struct strcache2 cache, const char str, unsigned int length)
992	{
993	struct strcache2_entry const *entry;
994	unsigned int hash = strcache2_case_insensitive_hash (str, length);
995	unsigned int idx;
996
997	assert (cache->case_insensitive);
998	assert (!memchr (str, '\0', length));
999
1000	MAKE_STATS (cache->lookup_count++);
1001
1002	/* Lookup the entry in the hash table, hoping for an
1003	early match. */
1004	idx = STRCACHE2_MOD_IT (cache, hash);
1005	entry = cache->hash_tab[idx];
1006	if (!entry)
1007	return NULL;
1008	if (strcache2_is_equal (cache, entry, str, length, hash))
1009	return (const char *)(entry + 1);
1010	MAKE_STATS (cache->collision_1st_count++);
1011
1012	entry = entry->next;
1013	if (!entry)
1014	return NULL;
1015	if (strcache2_is_equal (cache, entry, str, length, hash))
1016	return (const char *)(entry + 1);
1017	MAKE_STATS (cache->collision_2nd_count++);
1018
1019	/* Loop the rest. */
1020	for (;;)
1021	{
1022	entry = entry->next;
1023	if (!entry)
1024	return NULL;
1025	if (strcache2_is_equal (cache, entry, str, length, hash))
1026	return (const char *)(entry + 1);
1027	MAKE_STATS (cache->collision_3rd_count++);
1028	}
1029	/* not reached */
1030	}
1031
1032	#endif /* HAVE_CASE_INSENSITIVE_FS */
1033
1034	/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1035	int
1036	strcache2_is_cached (struct strcache2 cache, const char str)
1037	{
1038	/* Check mandatory alignment first. */
1039	if (!((size_t)str & (sizeof (void *) - 1)))
1040	{
1041	/* Check the segment list and consider the question answered if the
1042	string is within one of them. (Could check it more thoroughly...) */
1043	struct strcache2_seg const *seg;
1044	for (seg = cache->seg_head; seg; seg = seg->next)
1045	if ((size_t)(str - seg->start) < seg->size)
1046	return 1;
1047	}
1048
1049	return 0;
1050	}
1051
1052
1053	/* Verify the integrity of the specified string, returning 0 if OK. */
1054	int
1055	strcache2_verify_entry (struct strcache2 cache, const char str)
1056	{
1057	struct strcache2_entry const *entry;
1058	unsigned int hash;
1059	unsigned int length;
1060	const char *end;
1061
1062	entry = (struct strcache2_entry const *)str - 1;
1063	if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1064	{
1065	fprintf (stderr,
1066	"strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1067	cache->name, (void )entry, (void )str, str);
1068	return -1;
1069	}
1070
1071	end = memchr (str, '\0', entry->length + 1);
1072	length = end - str;
1073	if (length != entry->length)
1074	{
1075	fprintf (stderr,
1076	"strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1077	cache->name, (void *)entry, length, entry->length, str);
1078	return -1;
1079	}
1080
1081	hash = cache->case_insensitive
1082	? strcache2_case_insensitive_hash (str, entry->length)
1083	: strcache2_case_sensitive_hash (str, entry->length);
1084	if (hash != entry->hash)
1085	{
1086	fprintf (stderr,
1087	"strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1088	cache->name, (void *)entry, hash, entry->hash, str);
1089	return -1;
1090	}
1091
1092	return 0;
1093	}
1094
1095
1096	/* Calculates the case sensitive hash values of the string.
1097	The first hash is returned, the other is put at HASH2P. */
1098	unsigned int strcache2_hash_str (const char str, unsigned int length, unsigned int hash2p)
1099	{
1100	*hash2p = 1;
1101	return strcache2_case_sensitive_hash (str, length);
1102	}
1103
1104	/* Calculates the case insensitive hash values of the string.
1105	The first hash is returned, the other is put at HASH2P. */
1106	unsigned int strcache2_hash_istr (const char str, unsigned int length, unsigned int hash2p)
1107	{
1108	*hash2p = 1;
1109	return strcache2_case_insensitive_hash (str, length);
1110	}
1111
1112
1113
1114	/* Initalizes a new cache. */
1115	void
1116	strcache2_init (struct strcache2 cache, const char name, unsigned int size,
1117	unsigned int def_seg_size, int case_insensitive, int thread_safe)
1118	{
1119	unsigned hash_shift;
1120	assert (!thread_safe);
1121
1122	/* calc the size as a power of two */
1123	if (!size)
1124	hash_shift = STRCACHE2_HASH_SHIFT;
1125	else
1126	{
1127	assert (size <= (~0U / 2 + 1));
1128	for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1129	/* nothing */;
1130	}
1131
1132	/* adjust the default segment size */
1133	if (!def_seg_size)
1134	def_seg_size = STRCACHE2_SEG_SIZE;
1135	else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1136	def_seg_size = sizeof (struct strcache2_seg) * 10;
1137	else if ((def_seg_size & 0xfff) < 0xf00)
1138	def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1139
1140
1141	/* init the structure. */
1142	cache->case_insensitive = case_insensitive;
1143	#ifdef STRCACHE2_USE_MASK
1144	cache->hash_mask = (1U << hash_shift) - 1U;
1145	#else
1146	cache->hash_div = strcache2_find_prime(hash_shift);
1147	#endif
1148	cache->count = 0;
1149	cache->collision_count = 0;
1150	cache->lookup_count = 0;
1151	cache->collision_1st_count = 0;
1152	cache->collision_2nd_count = 0;
1153	cache->collision_3rd_count = 0;
1154	cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1155	cache->init_size = 1U << hash_shift;
1156	cache->hash_size = 1U << hash_shift;
1157	cache->def_seg_size = def_seg_size;
1158	cache->lock = NULL;
1159	cache->name = name;
1160
1161	/* allocate the hash table and first segment. */
1162	cache->hash_tab = (struct strcache2_entry **)
1163	xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1164	memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1165	strcache2_new_seg (cache, 0);
1166
1167	/* link it */
1168	cache->next = strcache_head;
1169	strcache_head = cache;
1170	}
1171
1172
1173	/* Terminates a string cache, freeing all memory and other
1174	associated resources. */
1175	void
1176	strcache2_term (struct strcache2 *cache)
1177	{
1178	/* unlink it */
1179	if (strcache_head == cache)
1180	strcache_head = cache->next;
1181	else
1182	{
1183	struct strcache2 *prev = strcache_head;
1184	while (prev->next != cache)
1185	prev = prev->next;
1186	assert (prev);
1187	prev->next = cache->next;
1188	}
1189
1190	/* free the memory segments */
1191	do
1192	{
1193	void *free_it = cache->seg_head;
1194	cache->seg_head = cache->seg_head->next;
1195	free (free_it);
1196	}
1197	while (cache->seg_head);
1198
1199	/* free the hash and clear the structure. */
1200	free (cache->hash_tab);
1201	memset (cache, '\0', sizeof (struct strcache2));
1202	}
1203
1204	/* Print statistics a string cache. */
1205	void
1206	strcache2_print_stats (struct strcache2 cache, const char prefix)
1207	{
1208	unsigned int seg_count = 0;
1209	unsigned long seg_total_bytes = 0;
1210	unsigned long seg_avg_bytes;
1211	unsigned long seg_avail_bytes = 0;
1212	unsigned long seg_max_bytes = 0;
1213	struct strcache2_seg *seg;
1214	unsigned int str_count = 0;
1215	unsigned long str_total_len = 0;
1216	unsigned int str_avg_len;
1217	unsigned int str_min_len = ~0U;
1218	unsigned int str_max_len = 0;
1219	unsigned int idx;
1220	unsigned int rehashes;
1221	unsigned int chain_depths[32];
1222
1223	printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1224
1225	/* Segment statistics. */
1226	for (seg = cache->seg_head; seg; seg = seg->next)
1227	{
1228	seg_count++;
1229	seg_total_bytes += seg->size;
1230	seg_avail_bytes += seg->avail;
1231	if (seg->size > seg_max_bytes)
1232	seg_max_bytes = seg->size;
1233	}
1234	seg_avg_bytes = seg_total_bytes / seg_count;
1235	printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1236	prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1237	cache->def_seg_size, seg_avail_bytes);
1238
1239	/* String statistics. */
1240	memset (chain_depths, '\0', sizeof (chain_depths));
1241	idx = cache->hash_size;
1242	while (idx-- > 0)
1243	{
1244	struct strcache2_entry const *entry = cache->hash_tab[idx];
1245	unsigned int depth = 0;
1246	for (; entry != 0; entry = entry->next, depth++)
1247	{
1248	unsigned int length = entry->length;
1249	str_total_len += length;
1250	if (length > str_max_len)
1251	str_max_len = length;
1252	if (length < str_min_len)
1253	str_min_len = length;
1254	str_count++;
1255	}
1256	chain_depths[depth >= 32 ? 31 : depth]++;
1257	}
1258	str_avg_len = cache->count ? str_total_len / cache->count : 0;
1259	printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1260	prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1261	if (str_count != cache->count)
1262	printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1263	cache->count, str_count);
1264
1265	/* Hash statistics. */
1266	idx = cache->init_size;
1267	rehashes = 0;
1268	while (idx < cache->hash_size)
1269	{
1270	idx *= 2;
1271	rehashes++;
1272	}
1273
1274	#ifdef STRCACHE2_USE_MASK
1275	printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1276	prefix, cache->hash_size, cache->hash_mask, rehashes);
1277	#else
1278	printf (_("%s hash size = %u div = %#x rehashed %u times"),
1279	prefix, cache->hash_size, cache->hash_div, rehashes);
1280	#endif
1281	if (cache->lookup_count)
1282	printf (_("%s lookups = %lu\n"
1283	"%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1284	prefix, cache->lookup_count,
1285	prefix,
1286	cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1287	cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1288	cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1289	printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1290	prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1291	printf (_("%s %5u (%u%%) empty hash table slots\n"),
1292	prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1293	printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1294	prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1295	for (idx = 2; idx < 32; idx++)
1296	{
1297	unsigned strs_at_this_depth = chain_depths[idx];
1298	unsigned i;
1299	for (i = idx + 1; i < 32; i++)
1300	strs_at_this_depth += chain_depths[i];
1301	if (strs_at_this_depth)
1302	printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1303	prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1304	idx - 1, idx == 2 ? " " : "s",
1305	strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1306	}
1307	}
1308
1309	/* Print statistics for all string caches. */
1310	void
1311	strcache2_print_stats_all (const char *prefix)
1312	{
1313	struct strcache2 *cur;
1314	for (cur = strcache_head; cur; cur = cur->next)
1315	strcache2_print_stats (cur, prefix);
1316	}
1317

Note: See TracBrowser for help on using the repository browser.

source: kBuild/trunk/src/kmk/strcache2.c@ 2657

Download in other formats: