VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/alloc/memcache.cpp@ 55747

Last change on this file since 55747 was 55256, checked in by vboxsync, 10 years ago

memcache: alloc tuning.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.0 KB
Line 
1/* $Id: memcache.cpp 55256 2015-04-14 15:46:47Z vboxsync $ */
2/** @file
3 * IPRT - Memory Object Allocation Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/memcache.h>
32#include "internal/iprt.h"
33
34#include <iprt/assert.h>
35#include <iprt/asm.h>
36#include <iprt/critsect.h>
37#include <iprt/err.h>
38#include <iprt/mem.h>
39#include <iprt/param.h>
40
41#include "internal/magics.h"
42
43
44/*******************************************************************************
45* Structures and Typedefs *
46*******************************************************************************/
47/** Pointer to a cache instance. */
48typedef struct RTMEMCACHEINT *PRTMEMCACHEINT;
49/** Pointer to a cache page. */
50typedef struct RTMEMCACHEPAGE *PRTMEMCACHEPAGE;
51
52
53
54/**
55 * A free object.
56 *
57 * @remarks This only works if the objects don't have a constructor or
58 * destructor and are big enough.
59 */
60typedef struct RTMEMCACHEFREEOBJ
61{
62 /** Pointer to the next free object */
63 struct RTMEMCACHEFREEOBJ * volatile pNext;
64} RTMEMCACHEFREEOBJ;
65/** Pointer to a free object. */
66typedef RTMEMCACHEFREEOBJ *PRTMEMCACHEFREEOBJ;
67
68
69/**
70 * A cache page.
71 *
72 * This is a page of memory that we split up in to a bunch object sized chunks
73 * and hand out to the cache users. The bitmap is updated in an atomic fashion
74 * so that we don't have to take any locks when freeing or allocating memory.
75 */
76typedef struct RTMEMCACHEPAGE
77{
78 /** Pointer to the cache owning this page.
79 * This is used for validation purposes only. */
80 PRTMEMCACHEINT pCache;
81 /** Pointer to the next page.
82 * This is marked as volatile since we'll be adding new entries to the list
83 * without taking any locks. */
84 PRTMEMCACHEPAGE volatile pNext;
85 /** Bitmap tracking allocated blocks. */
86 void volatile *pbmAlloc;
87 /** Bitmap tracking which blocks that has been thru the constructor. */
88 void volatile *pbmCtor;
89 /** Pointer to the object array. */
90 uint8_t *pbObjects;
91 /** The number of objects on this page. */
92 uint32_t cObjects;
93
94 /** Padding to force cFree into the next cache line. (ASSUMES CL = 64) */
95 uint8_t abPadding[ARCH_BITS == 32 ? 64 - 6*4 : 64 - 5*8 - 4];
96 /** The number of free objects. */
97 int32_t volatile cFree;
98} RTMEMCACHEPAGE;
99AssertCompileMemberOffset(RTMEMCACHEPAGE, cFree, 64);
100
101
102/**
103 * Memory object cache instance.
104 */
105typedef struct RTMEMCACHEINT
106{
107 /** Magic value (RTMEMCACHE_MAGIC). */
108 uint32_t u32Magic;
109 /** The object size. */
110 uint32_t cbObject;
111 /** Object alignment. */
112 uint32_t cbAlignment;
113 /** The per page object count. */
114 uint32_t cPerPage;
115 /** Number of bits in the bitmap.
116 * @remarks This is higher or equal to cPerPage and it is aligned such that
117 * the search operation will be most efficient on x86/AMD64. */
118 uint32_t cBits;
119 /** The maximum number of objects. */
120 uint32_t cMax;
121 /** Whether to the use the free list or not. */
122 bool fUseFreeList;
123 /** Head of the page list. */
124 PRTMEMCACHEPAGE pPageHead;
125 /** Poiner to the insertion point in the page list. */
126 PRTMEMCACHEPAGE volatile *ppPageNext;
127 /** Constructor callback. */
128 PFNMEMCACHECTOR pfnCtor;
129 /** Destructor callback. */
130 PFNMEMCACHEDTOR pfnDtor;
131 /** Callback argument. */
132 void *pvUser;
133 /** Critical section serializing page allocation and similar. */
134 RTCRITSECT CritSect;
135
136 /** The total object count. */
137 uint32_t volatile cTotal;
138 /** The number of free objects. */
139 int32_t volatile cFree;
140 /** This may point to a page with free entries. */
141 PRTMEMCACHEPAGE volatile pPageHint;
142 /** Stack of free items.
143 * These are marked as used in the allocation bitmaps.
144 *
145 * @todo This doesn't scale well when several threads are beating on the
146 * cache. Also, it totally doesn't work when the objects are too
147 * small. */
148 PRTMEMCACHEFREEOBJ volatile pFreeTop;
149} RTMEMCACHEINT;
150
151
152/*******************************************************************************
153* Internal Functions *
154*******************************************************************************/
155static void rtMemCacheFreeList(RTMEMCACHEINT *pThis, PRTMEMCACHEFREEOBJ pHead);
156
157
158RTDECL(int) RTMemCacheCreate(PRTMEMCACHE phMemCache, size_t cbObject, size_t cbAlignment, uint32_t cMaxObjects,
159 PFNMEMCACHECTOR pfnCtor, PFNMEMCACHEDTOR pfnDtor, void *pvUser, uint32_t fFlags)
160
161{
162 AssertPtr(phMemCache);
163 AssertPtrNull(pfnCtor);
164 AssertPtrNull(pfnDtor);
165 AssertReturn(!pfnDtor || pfnCtor, VERR_INVALID_PARAMETER);
166 AssertReturn(cbObject > 0, VERR_INVALID_PARAMETER);
167 AssertReturn(cbObject <= PAGE_SIZE / 8, VERR_INVALID_PARAMETER);
168 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
169
170 if (cbAlignment == 0)
171 {
172 if (cbObject <= 2)
173 cbAlignment = cbObject;
174 else if (cbObject <= 4)
175 cbAlignment = 4;
176 else if (cbObject <= 8)
177 cbAlignment = 8;
178 else if (cbObject <= 16)
179 cbAlignment = 16;
180 else if (cbObject <= 32)
181 cbAlignment = 32;
182 else
183 cbAlignment = 64;
184 }
185 else
186 {
187 AssertReturn(!((cbAlignment - 1) & cbAlignment), VERR_NOT_POWER_OF_TWO);
188 AssertReturn(cbAlignment <= 64, VERR_OUT_OF_RANGE);
189 }
190
191 /*
192 * Allocate and initialize the instance memory.
193 */
194 RTMEMCACHEINT *pThis = (RTMEMCACHEINT *)RTMemAlloc(sizeof(*pThis));
195 if (!pThis)
196 return VERR_NO_MEMORY;
197 int rc = RTCritSectInit(&pThis->CritSect);
198 if (RT_FAILURE(rc))
199 {
200 RTMemFree(pThis);
201 return rc;
202 }
203
204 pThis->u32Magic = RTMEMCACHE_MAGIC;
205 pThis->cbObject = (uint32_t)RT_ALIGN_Z(cbObject, cbAlignment);
206 pThis->cbAlignment = (uint32_t)cbAlignment;
207 pThis->cPerPage = (uint32_t)((PAGE_SIZE - RT_ALIGN_Z(sizeof(RTMEMCACHEPAGE), cbAlignment)) / pThis->cbObject);
208 while ( RT_ALIGN_Z(sizeof(RTMEMCACHEPAGE), 8)
209 + pThis->cPerPage * pThis->cbObject
210 + RT_ALIGN(pThis->cPerPage, 64) / 8 * 2
211 > PAGE_SIZE)
212 pThis->cPerPage--;
213 pThis->cBits = RT_ALIGN(pThis->cPerPage, 64);
214 pThis->cMax = cMaxObjects;
215 pThis->fUseFreeList = cbObject >= sizeof(RTMEMCACHEFREEOBJ)
216 && !pfnCtor
217 && !pfnDtor;
218 pThis->pPageHead = NULL;
219 pThis->ppPageNext = &pThis->pPageHead;
220 pThis->pfnCtor = pfnCtor;
221 pThis->pfnDtor = pfnDtor;
222 pThis->pvUser = pvUser;
223 pThis->cTotal = 0;
224 pThis->cFree = 0;
225 pThis->pPageHint = NULL;
226 pThis->pFreeTop = NULL;
227
228 *phMemCache = pThis;
229 return VINF_SUCCESS;
230}
231
232
233RTDECL(int) RTMemCacheDestroy(RTMEMCACHE hMemCache)
234{
235 RTMEMCACHEINT *pThis = hMemCache;
236 if (!pThis)
237 return VINF_SUCCESS;
238 AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
239 AssertReturn(pThis->u32Magic == RTMEMCACHE_MAGIC, VERR_INVALID_HANDLE);
240
241#if 0 /*def RT_STRICT - don't require eveything to be freed. Caches are very convenient for lazy cleanup. */
242 uint32_t cFree = pThis->cFree;
243 for (PRTMEMCACHEFREEOBJ pFree = pThis->pFreeTop; pFree && cFree < pThis->cTotal + 5; pFree = pFree->pNext)
244 cFree++;
245 AssertMsg(cFree == pThis->cTotal, ("cFree=%u cTotal=%u\n", cFree, pThis->cTotal));
246#endif
247
248 /*
249 * Destroy it.
250 */
251 AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, RTMEMCACHE_MAGIC_DEAD, RTMEMCACHE_MAGIC), VERR_INVALID_HANDLE);
252 RTCritSectDelete(&pThis->CritSect);
253
254 while (pThis->pPageHead)
255 {
256 PRTMEMCACHEPAGE pPage = pThis->pPageHead;
257 pThis->pPageHead = pPage->pNext;
258 pPage->cFree = 0;
259
260 if (pThis->pfnDtor)
261 {
262 uint32_t iObj = pPage->cObjects;
263 while (iObj-- > 0)
264 if (ASMBitTestAndClear(pPage->pbmCtor, iObj))
265 pThis->pfnDtor(hMemCache, pPage->pbObjects + iObj * pThis->cbObject, pThis->pvUser);
266 }
267
268 RTMemPageFree(pPage, PAGE_SIZE);
269 }
270
271 RTMemFree(pThis);
272 return VINF_SUCCESS;
273}
274
275
276/**
277 * Grows the cache.
278 *
279 * @returns IPRT status code.
280 * @param pThis The memory cache instance.
281 */
282static int rtMemCacheGrow(RTMEMCACHEINT *pThis)
283{
284 /*
285 * Enter the critical section here to avoid allocation races leading to
286 * wasted memory (++) and make it easier to link in the new page.
287 */
288 RTCritSectEnter(&pThis->CritSect);
289 int rc = VINF_SUCCESS;
290 if (pThis->cFree < 0)
291 {
292 /*
293 * Allocate and initialize the new page.
294 *
295 * We put the constructor bitmap at the lower end right after cFree.
296 * We then push the object array to the end of the page and place the
297 * allocation bitmap below it. The hope is to increase the chance that
298 * the allocation bitmap is in a different cache line than cFree since
299 * this increases performance markably when lots of threads are beating
300 * on the cache.
301 */
302 PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)RTMemPageAlloc(PAGE_SIZE);
303 if (pPage)
304 {
305 uint32_t const cObjects = RT_MIN(pThis->cPerPage, pThis->cMax - pThis->cTotal);
306
307 ASMMemZeroPage(pPage);
308 pPage->pCache = pThis;
309 pPage->pNext = NULL;
310 pPage->cFree = cObjects;
311 pPage->cObjects = cObjects;
312 uint8_t *pb = (uint8_t *)(pPage + 1);
313 pb = RT_ALIGN_PT(pb, 8, uint8_t *);
314 pPage->pbmCtor = pb;
315 pb = (uint8_t *)pPage + PAGE_SIZE - pThis->cbObject * cObjects;
316 pPage->pbObjects = pb; Assert(RT_ALIGN_P(pb, pThis->cbAlignment) == pb);
317 pb -= pThis->cBits / 8;
318 pb = (uint8_t *)((uintptr_t)pb & ~(uintptr_t)7);
319 pPage->pbmAlloc = pb;
320 Assert((uintptr_t)pPage->pbmCtor + pThis->cBits / 8 <= (uintptr_t)pPage->pbmAlloc);
321
322 /* Mark the bitmap padding and any unused objects as allocated. */
323 for (uint32_t iBit = cObjects; iBit < pThis->cBits; iBit++)
324 ASMBitSet(pPage->pbmAlloc, iBit);
325
326 /* Make it the hint. */
327 ASMAtomicWritePtr(&pThis->pPageHint, pPage);
328
329 /* Link the page in at the end of the list. */
330 ASMAtomicWritePtr(pThis->ppPageNext, pPage);
331 pThis->ppPageNext = &pPage->pNext;
332
333 /* Add it to the page counts. */
334 ASMAtomicAddS32(&pThis->cFree, cObjects);
335 ASMAtomicAddU32(&pThis->cTotal, cObjects);
336 }
337 else
338 rc = VERR_NO_MEMORY;
339 }
340 RTCritSectLeave(&pThis->CritSect);
341 return rc;
342}
343
344
345/**
346 * Grabs a an object in a page.
347 * @returns New cFree value on success (0 or higher), -1 on failure.
348 * @param pPage Pointer to the page.
349 */
350DECL_FORCE_INLINE(int32_t) rtMemCacheGrabObj(PRTMEMCACHEPAGE pPage)
351{
352 if (ASMAtomicUoReadS32(&pPage->cFree) > 0)
353 {
354 int32_t cFreeNew = ASMAtomicDecS32(&pPage->cFree);
355 if (cFreeNew >= 0)
356 return cFreeNew;
357 ASMAtomicIncS32(&pPage->cFree);
358 }
359 return -1;
360}
361
362
363RTDECL(int) RTMemCacheAllocEx(RTMEMCACHE hMemCache, void **ppvObj)
364{
365 RTMEMCACHEINT *pThis = hMemCache;
366 AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
367 AssertReturn(pThis->u32Magic == RTMEMCACHE_MAGIC, VERR_INVALID_PARAMETER);
368
369 /*
370 * Try grab a free object from the stack.
371 */
372 PRTMEMCACHEFREEOBJ pObj = ASMAtomicUoReadPtrT(&pThis->pFreeTop, PRTMEMCACHEFREEOBJ);
373 if (pObj)
374 {
375 pObj = ASMAtomicXchgPtrT(&pThis->pFreeTop, NULL, PRTMEMCACHEFREEOBJ);
376 if (pObj)
377 {
378 if (pObj->pNext)
379 {
380 Assert(pObj->pNext != pObj);
381 PRTMEMCACHEFREEOBJ pAllocRace = ASMAtomicXchgPtrT(&pThis->pFreeTop, pObj->pNext, PRTMEMCACHEFREEOBJ);
382 if (pAllocRace)
383 rtMemCacheFreeList(pThis, pAllocRace);
384 }
385
386 pObj->pNext = NULL;
387 *ppvObj = pObj;
388 return VINF_SUCCESS;
389 }
390 }
391
392 /*
393 * Try grab a free object at the cache level.
394 */
395 int32_t cNewFree = ASMAtomicDecS32(&pThis->cFree);
396 if (RT_LIKELY(cNewFree < 0))
397 {
398 uint32_t cTotal = ASMAtomicUoReadU32(&pThis->cTotal);
399 if ( (uint32_t)(cTotal + -cNewFree) > pThis->cMax
400 || (uint32_t)(cTotal + -cNewFree) <= cTotal)
401 {
402 ASMAtomicIncS32(&pThis->cFree);
403 return VERR_MEM_CACHE_MAX_SIZE;
404 }
405
406 int rc = rtMemCacheGrow(pThis);
407 if (RT_FAILURE(rc))
408 {
409 ASMAtomicIncS32(&pThis->cFree);
410 return rc;
411 }
412 }
413
414 /*
415 * Grab a free object at the page level.
416 */
417 PRTMEMCACHEPAGE pPage = ASMAtomicUoReadPtrT(&pThis->pPageHint, PRTMEMCACHEPAGE);
418 int32_t iObj = pPage ? rtMemCacheGrabObj(pPage) : -1;
419 if (iObj < 0)
420 {
421 for (unsigned cLoops = 0; ; cLoops++)
422 {
423 for (pPage = pThis->pPageHead; pPage; pPage = pPage->pNext)
424 {
425 iObj = rtMemCacheGrabObj(pPage);
426 if (iObj >= 0)
427 {
428 if (iObj > 0)
429 ASMAtomicWritePtr(&pThis->pPageHint, pPage);
430 break;
431 }
432 }
433 if (iObj >= 0)
434 break;
435 Assert(cLoops != 2);
436 Assert(cLoops < 10);
437 }
438 }
439 Assert(iObj >= 0);
440 Assert((uint32_t)iObj < pThis->cMax);
441
442 /*
443 * Find a free object in the allocation bitmap. Use the new cFree count
444 * as a hint.
445 */
446 if (ASMAtomicBitTestAndSet(pPage->pbmAlloc, iObj))
447 {
448 for (unsigned cLoops2 = 0;; cLoops2++)
449 {
450 iObj = ASMBitFirstClear(pPage->pbmAlloc, pThis->cBits);
451 if (RT_LIKELY(iObj >= 0))
452 {
453 if (!ASMAtomicBitTestAndSet(pPage->pbmAlloc, iObj))
454 break;
455 }
456 else
457 ASMMemoryFence();
458 Assert(cLoops2 != 40);
459 }
460 Assert(iObj >= 0);
461 }
462 void *pvObj = &pPage->pbObjects[iObj * pThis->cbObject];
463 Assert((uintptr_t)pvObj - (uintptr_t)pPage < PAGE_SIZE);
464
465 /*
466 * Call the constructor?
467 */
468 if ( pThis->pfnCtor
469 && !ASMAtomicBitTestAndSet(pPage->pbmCtor, iObj))
470 {
471 int rc = pThis->pfnCtor(hMemCache, pvObj, pThis->pvUser);
472 if (RT_FAILURE(rc))
473 {
474 ASMAtomicBitClear(pPage->pbmCtor, iObj);
475 RTMemCacheFree(pThis, pvObj);
476 return rc;
477 }
478 }
479
480 *ppvObj = pvObj;
481 return VINF_SUCCESS;
482}
483
484
485RTDECL(void *) RTMemCacheAlloc(RTMEMCACHE hMemCache)
486{
487 void *pvObj;
488 int rc = RTMemCacheAllocEx(hMemCache, &pvObj);
489 if (RT_SUCCESS(rc))
490 return pvObj;
491 return NULL;
492}
493
494
495
496/**
497 * Really frees one object.
498 *
499 * @param pThis The memory cache.
500 * @param pvObj The memory object to free.
501 */
502static void rtMemCacheFreeOne(RTMEMCACHEINT *pThis, void *pvObj)
503{
504 /* Note: Do *NOT* attempt to poison the object! */
505
506 /*
507 * Find the cache page. The page structure is at the start of the page.
508 */
509 PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)(((uintptr_t)pvObj) & ~(uintptr_t)PAGE_OFFSET_MASK);
510 Assert(pPage->pCache == pThis);
511 Assert(ASMAtomicUoReadS32(&pPage->cFree) < (int32_t)pThis->cPerPage);
512
513 /*
514 * Clear the bitmap bit and update the two object counter. Order matters!
515 */
516 uintptr_t offObj = (uintptr_t)pvObj - (uintptr_t)pPage->pbObjects;
517 uintptr_t iObj = offObj / pThis->cbObject;
518 Assert(iObj * pThis->cbObject == offObj);
519 Assert(iObj < pThis->cPerPage);
520 AssertReturnVoid(ASMAtomicBitTestAndClear(pPage->pbmAlloc, iObj));
521
522 ASMAtomicIncS32(&pPage->cFree);
523 ASMAtomicIncS32(&pThis->cFree);
524}
525
526
527/**
528 * Really frees a list of 'freed' object.
529 *
530 * @param pThis The memory cache.
531 * @param pHead The head of the list.
532 */
533static void rtMemCacheFreeList(RTMEMCACHEINT *pThis, PRTMEMCACHEFREEOBJ pHead)
534{
535 while (pHead)
536 {
537 PRTMEMCACHEFREEOBJ pFreeMe = pHead;
538 pHead = pHead->pNext;
539 pFreeMe->pNext = NULL;
540 ASMCompilerBarrier();
541 rtMemCacheFreeOne(pThis, pFreeMe);
542 }
543}
544
545
546
547RTDECL(void) RTMemCacheFree(RTMEMCACHE hMemCache, void *pvObj)
548{
549 if (!pvObj)
550 return;
551
552 RTMEMCACHEINT *pThis = hMemCache;
553 AssertPtrReturnVoid(pThis);
554 AssertReturnVoid(pThis->u32Magic == RTMEMCACHE_MAGIC);
555
556 AssertPtr(pvObj);
557 Assert(RT_ALIGN_P(pvObj, pThis->cbAlignment) == pvObj);
558
559 if (!pThis->fUseFreeList)
560 rtMemCacheFreeOne(pThis, pvObj);
561 else
562 {
563# ifdef RT_STRICT
564 /* This is the same as the other branch, except it's not actually freed. */
565 PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)(((uintptr_t)pvObj) & ~(uintptr_t)PAGE_OFFSET_MASK);
566 Assert(pPage->pCache == pThis);
567 Assert(ASMAtomicUoReadS32(&pPage->cFree) < (int32_t)pThis->cPerPage);
568 uintptr_t offObj = (uintptr_t)pvObj - (uintptr_t)pPage->pbObjects;
569 uintptr_t iObj = offObj / pThis->cbObject;
570 Assert(iObj * pThis->cbObject == offObj);
571 Assert(iObj < pThis->cPerPage);
572 AssertReturnVoid(ASMBitTest(pPage->pbmAlloc, (int32_t)iObj));
573# endif
574
575 /*
576 * Push it onto the free stack.
577 */
578 PRTMEMCACHEFREEOBJ pObj = (PRTMEMCACHEFREEOBJ)pvObj;
579 pObj->pNext = ASMAtomicXchgPtrT(&pThis->pFreeTop, NULL, PRTMEMCACHEFREEOBJ);
580 PRTMEMCACHEFREEOBJ pFreeRace = ASMAtomicXchgPtrT(&pThis->pFreeTop, pObj, PRTMEMCACHEFREEOBJ);
581 if (pFreeRace)
582 rtMemCacheFreeList(pThis, pFreeRace);
583 }
584}
585
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette