1 | /* $Id: cpu-numa.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * numa - NUMA / memory benchmark.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2011-2024 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * The contents of this file may alternatively be used under the terms
|
---|
26 | * of the Common Development and Distribution License Version 1.0
|
---|
27 | * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | * in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | * CDDL are applicable instead of those of the GPL.
|
---|
30 | *
|
---|
31 | * You may elect to license modified versions of this file under the
|
---|
32 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | *
|
---|
34 | * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | */
|
---|
36 |
|
---|
37 |
|
---|
38 | /*********************************************************************************************************************************
|
---|
39 | * Header Files *
|
---|
40 | *********************************************************************************************************************************/
|
---|
41 | #include <iprt/test.h>
|
---|
42 |
|
---|
43 | #include <iprt/asm.h>
|
---|
44 | //#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
|
---|
45 | //# include <iprt/asm-amd64-x86.h>
|
---|
46 | //#endif
|
---|
47 | #include <iprt/mem.h>
|
---|
48 | #include <iprt/mp.h>
|
---|
49 | #include <iprt/string.h>
|
---|
50 | #include <iprt/thread.h>
|
---|
51 | #include <iprt/time.h>
|
---|
52 |
|
---|
53 |
|
---|
54 | /*********************************************************************************************************************************
|
---|
55 | * Global Variables *
|
---|
56 | *********************************************************************************************************************************/
|
---|
57 | /** The number of threads to skip when testing. */
|
---|
58 | static uint32_t g_cThreadsToSkip = 1;
|
---|
59 |
|
---|
60 | /**
|
---|
61 | * Gets the next online CPU.
|
---|
62 | *
|
---|
63 | * @returns Next CPU index or RTCPUSET_MAX_CPUS.
|
---|
64 | * @param iCurCpu The current CPU (index).
|
---|
65 | */
|
---|
66 | static int getNextCpu(unsigned iCurCpu)
|
---|
67 | {
|
---|
68 | /* Skip to the next chip. */
|
---|
69 | iCurCpu = (iCurCpu / g_cThreadsToSkip) * g_cThreadsToSkip;
|
---|
70 | iCurCpu += g_cThreadsToSkip;
|
---|
71 |
|
---|
72 | /* Skip offline cpus. */
|
---|
73 | while ( iCurCpu < RTCPUSET_MAX_CPUS
|
---|
74 | && !RTMpIsCpuOnline(iCurCpu) )
|
---|
75 | iCurCpu++;
|
---|
76 |
|
---|
77 | /* Make sure we're within bounds (in case of bad input). */
|
---|
78 | if (iCurCpu > RTCPUSET_MAX_CPUS)
|
---|
79 | iCurCpu = RTCPUSET_MAX_CPUS;
|
---|
80 | return iCurCpu;
|
---|
81 | }
|
---|
82 |
|
---|
83 |
|
---|
84 | static void doTest(RTTEST hTest)
|
---|
85 | {
|
---|
86 | NOREF(hTest);
|
---|
87 | uint32_t iAllocCpu = 0;
|
---|
88 | while (iAllocCpu < RTCPUSET_MAX_CPUS)
|
---|
89 | {
|
---|
90 | const uint32_t cbTestSet = _1M * 32;
|
---|
91 | const uint32_t cIterations = 384;
|
---|
92 |
|
---|
93 | /*
|
---|
94 | * Change CPU and allocate a chunk of memory.
|
---|
95 | */
|
---|
96 | RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAllocCpu)));
|
---|
97 |
|
---|
98 | void *pvTest = RTMemPageAlloc(cbTestSet); /* may be leaked, who cares */
|
---|
99 | RTTESTI_CHECK_RETV(pvTest != NULL);
|
---|
100 | memset(pvTest, 0xef, cbTestSet);
|
---|
101 |
|
---|
102 | /*
|
---|
103 | * Do the tests.
|
---|
104 | */
|
---|
105 | uint32_t iAccessCpu = 0;
|
---|
106 | while (iAccessCpu < RTCPUSET_MAX_CPUS)
|
---|
107 | {
|
---|
108 | RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAccessCpu)));
|
---|
109 |
|
---|
110 | /*
|
---|
111 | * The write test.
|
---|
112 | */
|
---|
113 | RTTimeNanoTS(); RTThreadYield();
|
---|
114 | uint64_t u64StartTS = RTTimeNanoTS();
|
---|
115 | for (uint32_t i = 0; i < cIterations; i++)
|
---|
116 | {
|
---|
117 | ASMCompilerBarrier(); /* paranoia */
|
---|
118 | memset(pvTest, i, cbTestSet);
|
---|
119 | }
|
---|
120 | uint64_t const cNsElapsedWrite = RTTimeNanoTS() - u64StartTS;
|
---|
121 | uint64_t cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
|
---|
122 | / ((long double)cNsElapsedWrite / RT_NS_1SEC_64) /* seconds */
|
---|
123 | / _1M /* MB */ );
|
---|
124 | RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-write", iAllocCpu, iAccessCpu);
|
---|
125 |
|
---|
126 | /*
|
---|
127 | * The read test.
|
---|
128 | */
|
---|
129 | memset(pvTest, 0, cbTestSet);
|
---|
130 | RTTimeNanoTS(); RTThreadYield();
|
---|
131 | u64StartTS = RTTimeNanoTS();
|
---|
132 | for (uint32_t i = 0; i < cIterations; i++)
|
---|
133 | {
|
---|
134 | #if 1
|
---|
135 | size_t u = 0;
|
---|
136 | size_t volatile *puCur = (size_t volatile *)pvTest;
|
---|
137 | size_t volatile *puEnd = puCur + cbTestSet / sizeof(size_t);
|
---|
138 | while (puCur != puEnd)
|
---|
139 | u += *puCur++;
|
---|
140 | #else
|
---|
141 | ASMCompilerBarrier(); /* paranoia */
|
---|
142 | void *pvFound = memchr(pvTest, (i & 127) + 1, cbTestSet);
|
---|
143 | RTTESTI_CHECK(pvFound == NULL);
|
---|
144 | #endif
|
---|
145 | }
|
---|
146 | uint64_t const cNsElapsedRead = RTTimeNanoTS() - u64StartTS;
|
---|
147 | cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
|
---|
148 | / ((long double)cNsElapsedRead / RT_NS_1SEC_64) /* seconds */
|
---|
149 | / _1M /* MB */ );
|
---|
150 | RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read", iAllocCpu, iAccessCpu);
|
---|
151 |
|
---|
152 | /*
|
---|
153 | * The read/write test.
|
---|
154 | */
|
---|
155 | RTTimeNanoTS(); RTThreadYield();
|
---|
156 | u64StartTS = RTTimeNanoTS();
|
---|
157 | for (uint32_t i = 0; i < cIterations; i++)
|
---|
158 | {
|
---|
159 | ASMCompilerBarrier(); /* paranoia */
|
---|
160 | memcpy(pvTest, (uint8_t *)pvTest + cbTestSet / 2, cbTestSet / 2);
|
---|
161 | }
|
---|
162 | uint64_t const cNsElapsedRW = RTTimeNanoTS() - u64StartTS;
|
---|
163 | cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
|
---|
164 | / ((long double)cNsElapsedRW / RT_NS_1SEC_64) /* seconds */
|
---|
165 | / _1M /* MB */ );
|
---|
166 | RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read-write", iAllocCpu, iAccessCpu);
|
---|
167 |
|
---|
168 | /*
|
---|
169 | * Total time.
|
---|
170 | */
|
---|
171 | RTTestIValueF(cNsElapsedRead + cNsElapsedWrite + cNsElapsedRW, RTTESTUNIT_NS,
|
---|
172 | "cpu%02u-mem%02u-time", iAllocCpu, iAccessCpu);
|
---|
173 |
|
---|
174 | /* advance */
|
---|
175 | iAccessCpu = getNextCpu(iAccessCpu);
|
---|
176 | }
|
---|
177 |
|
---|
178 | /*
|
---|
179 | * Clean up and advance to the next CPU.
|
---|
180 | */
|
---|
181 | RTMemPageFree(pvTest, cbTestSet);
|
---|
182 | iAllocCpu = getNextCpu(iAllocCpu);
|
---|
183 | }
|
---|
184 | }
|
---|
185 |
|
---|
186 |
|
---|
187 | int main(int argc, char **argv)
|
---|
188 | {
|
---|
189 | RTTEST hTest;
|
---|
190 | RTEXITCODE rcExit = RTTestInitAndCreate("numa-1", &hTest);
|
---|
191 | if (rcExit != RTEXITCODE_SUCCESS)
|
---|
192 | return rcExit;
|
---|
193 | RTTestBanner(hTest);
|
---|
194 |
|
---|
195 | #if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
|
---|
196 | /** @todo figure basic topology. */
|
---|
197 | #endif
|
---|
198 | if (argc == 2)
|
---|
199 | g_cThreadsToSkip = RTStrToUInt8(argv[1]);
|
---|
200 |
|
---|
201 | doTest(hTest);
|
---|
202 |
|
---|
203 | return RTTestSummaryAndDestroy(hTest);
|
---|
204 | }
|
---|
205 |
|
---|