Changeset 89150 in vbox for trunk/src/VBox/ValidationKit/bootsectors
- Timestamp:
- May 19, 2021 12:54:21 AM (4 years ago)
- Location:
- trunk/src/VBox/ValidationKit/bootsectors
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/ValidationKit/bootsectors/Makefile.kmk
r89142 r89150 397 397 bs3-timing-1-32.c32 398 398 399 PROGRAMS += bs3-timing-1-exe 400 bs3-timing-1-exe_TEMPLATE = VBoxR3Static 401 bs3-timing-1-exe_SOURCES = bs3-timing-1-exe.c 399 402 endif # VBOX_WITH_BS3KIT 400 403 -
trunk/src/VBox/ValidationKit/bootsectors/bs3-timing-1-32.c32
r89142 r89150 29 29 * Header Files * 30 30 *********************************************************************************************************************************/ 31 #include <bs3kit.h> 31 #ifndef STANDALONE_EXECUTABLE 32 # include <bs3kit.h> 33 #endif 32 34 #include <iprt/asm-amd64-x86.h> 33 35 #include <iprt/asm-math.h> 34 35 36 static void bs3Timing1_Tsc_One(void) 37 { 38 uint64_t const nsStart = Bs3TestNow(); 39 uint64_t const uTscStart = ASMReadTSC(); 40 uint64_t const nsDeadline = nsStart + RT_NS_10SEC; 41 uint64_t cNsElapsed; 42 uint64_t cTicksElapsed; 43 uint64_t cTicksMin = UINT64_MAX; 44 uint64_t cTicksMax = 0; 45 uint64_t cTicksSum = 0; 46 uint64_t cBelow512 = 0; 47 uint64_t cBelow1K = 0; 48 uint64_t cBelow4K = 0; 49 uint64_t cAbove512K = 0; 50 uint64_t cAbove1M = 0; 51 uint64_t cAbove10M = 0; 52 uint64_t cTotalLoops = 0; 36 #include <iprt/asm.h> 37 #include <iprt/uint128.h> 38 39 40 /********************************************************************************************************************************* 41 * Structures and Typedefs * 42 *********************************************************************************************************************************/ 43 /** TSC timing results. */ 44 typedef struct BS3TIMING1RESULT 45 { 46 /** Number of nanoseconds elapsed while testing. */ 47 uint64_t cNsElapsed; 48 /** Number of CPU ticks elapsed while testing. */ 49 uint64_t cTicksElapsed; 50 /** The minium number of ticks between TSC reads. */ 51 uint64_t cTicksMin; 52 /** The maximum number of ticks between TSC reads. */ 53 uint64_t cTicksMax; 54 /** The sum of all TSC read deltas. */ 55 uint64_t cTicksSum; 56 /** Number of loops (TSC read deltas). */ 57 uint64_t cTotalLoops; 58 /** Number of times TSC moved backwards. */ 59 uint64_t cBackwards; 60 /** Approx log2(cTicks) distribution. */ 61 uint64_t aDistribution[65]; 62 } BS3TIMING1RESULT; 63 64 65 /********************************************************************************************************************************* 66 * Global Variables * 67 *********************************************************************************************************************************/ 68 /** The total result. */ 69 static BS3TIMING1RESULT g_TotalResult; 70 71 /** Set if history wrapped (i.e. table is full). */ 72 static bool g_fBigHistoryWrapped = false; 73 /** The next history entry. */ 74 static uint32_t g_iBigHistory; 75 /** History of large gaps. */ 76 static struct { uint64_t uTsc, cTicksDelta; } g_aBigHistory[384]; 77 78 79 80 /** 81 * Pretty prints 82 */ 83 static void bs3Timing1_PrintTicks(uint64_t cTicks, uint64_t uCpuFreq) 84 { 85 if (uCpuFreq > _128M) 86 { 87 if (cTicks >= uCpuFreq * 1000) 88 Bs3TestPrintf("%'RU64s", cTicks / uCpuFreq); 89 else 90 { 91 const char *pszUnit; 92 uint64_t uValue; 93 if (cTicks >= uCpuFreq) 94 { 95 pszUnit = "s"; 96 uValue = (cTicks * RT_MS_1SEC) / uCpuFreq; 97 } 98 else if (cTicks * RT_MS_1SEC >= uCpuFreq) 99 { 100 pszUnit = "ms"; 101 uValue = (cTicks * RT_US_1SEC) / uCpuFreq; 102 } 103 else if (cTicks * RT_US_1SEC >= uCpuFreq) 104 { 105 pszUnit = "us"; 106 uValue = (cTicks * RT_NS_1SEC) / uCpuFreq; 107 } 108 else if (cTicks * RT_NS_1SEC >= uCpuFreq) 109 { 110 pszUnit = "ns"; 111 uValue = (cTicks * UINT64_C(1000000000000)) / uCpuFreq; 112 } 113 else 114 { 115 Bs3TestPrintf("%'RU64ps", (cTicks * UINT64_C(1000000000000)) / uCpuFreq); 116 return; 117 } 118 Bs3TestPrintf("%u.%03u%s (%'RU64 ticks)", (uint32_t)uValue / 1000, (uint32_t)uValue % 1000, pszUnit, cTicks); 119 } 120 } 121 else 122 Bs3TestPrintf("%'RU64 ticks", cTicks); 123 } 124 125 126 /** 127 * Prints a result. 128 * 129 * @param pResult The result to print. 130 * @param iRun The run (loop in qpc parlance). 131 * @param uVerbosity The verbosity level. 132 */ 133 static void bs3Timing1_PrintResult(BS3TIMING1RESULT const *pResult, unsigned iRun, unsigned uVerbosity) 134 { 135 uint64_t uCpuFreq; 136 137 /* 138 * Calc CPU frequency. 139 */ 140 if (pResult->cNsElapsed > 0 && pResult->cTicksElapsed > 0) 141 { 142 #if 1 143 RTUINT128U Tmp1, Divisor, Result; 144 RTUInt128Div(&Result, 145 RTUInt128MulU64ByU64(&Tmp1, pResult->cTicksElapsed, RT_NS_1SEC), 146 RTUInt128AssignU64(&Divisor, pResult->cNsElapsed)); 147 uCpuFreq = Result.s.Lo; 148 #else 149 unsigned const cShift = pResult->cTicksElapsed < UINT64_C(0x000225C17D04) ? 0 150 : pResult->cTicksElapsed < UINT64_C(0x00225C17D04D) ? 4 151 : pResult->cTicksElapsed < UINT64_C(0x0225C17D04DA) ? 8 152 : pResult->cTicksElapsed < UINT64_C(0x225C1D940BF6) ? 12 153 : 16; 154 uCpuFreq = pResult->cTicksElapsed * ((uint64_t)RT_NS_1SEC >> cShift) / (pResult->cNsElapsed >> cShift); 155 #endif 156 } 157 else 158 uCpuFreq = 1; 159 160 /* 161 * Report results. 162 * 163 * Note! in 32-bit and 16-bit mode, values 4G or higher gets formatted as 164 * hexadecimal to avoid 64-bit division. 165 */ 166 Bs3TestPrintf("Loop #%u: %'RU64 tests: ", iRun, pResult->cTotalLoops); 167 168 Bs3TestPrintf("average "); 169 bs3Timing1_PrintTicks(pResult->cTicksSum / pResult->cTotalLoops, uCpuFreq); 170 Bs3TestPrintf(", min "); 171 bs3Timing1_PrintTicks(pResult->cTicksMin, uCpuFreq); 172 Bs3TestPrintf(", max "); 173 bs3Timing1_PrintTicks(pResult->cTicksMax, uCpuFreq); 174 Bs3TestPrintf("\n"); 175 176 /* Distribution (tick delta log2-ish). */ 177 if (uVerbosity > 0) 178 { 179 unsigned iItem = 0; 180 unsigned i; 181 for (i = uVerbosity > 1 ? 0 : 5; i < RT_ELEMENTS(pResult->aDistribution); i++) 182 if (pResult->aDistribution[i] != 0) 183 { 184 if (iItem >= 6) 185 { 186 iItem = 0; 187 Bs3TestPrintf("\n"); 188 } 189 iItem++; 190 Bs3TestPrintf(" %'11RU64|2^%-2u", pResult->aDistribution[i], i); 191 } 192 if (uVerbosity > 1) 193 Bs3TestPrintf(iItem < 6 ? " (%'RU64 Hz)\n" : "\n (%'RU64 Hz)\n", uCpuFreq); 194 else 195 Bs3TestPrintf("\n"); 196 } 197 if (pResult->cBackwards != 0) 198 Bs3TestFailedF("TSC went backwards %'RU64 time(s)", pResult->cBackwards); 199 } 200 201 202 /** 203 * Do one TSC timing iteration. 204 * 205 * @param iRun The iteration number (loop). 206 * @param cSecs The number of seconds to sample TSCs. 207 * @param uVerbosity The noise level. 208 * @param iMinHistory The threshold level to put stuff in g_auTscHistory. 209 */ 210 static void bs3Timing1_Tsc_One(unsigned iRun, uint32_t cSecs, unsigned uVerbosity, unsigned iMinHistory) 211 { 212 uint64_t const nsStart = Bs3TestNow(); 213 uint64_t const uTscStart = ASMReadTSC(); 214 uint64_t const nsDeadline = nsStart + cSecs * RT_NS_1SEC_64; 215 uint64_t cNsElapsed; 216 BS3TIMING1RESULT Result; 217 unsigned i; 218 219 Bs3MemZero(&Result, sizeof(Result)); 220 Result.cTicksMin = UINT64_MAX; 53 221 54 222 /* … … 58 226 { 59 227 unsigned cLoops = 100000 + 1; 60 cTotalLoops += cLoops - 1;228 Result.cTotalLoops += cLoops - 1; 61 229 while (--cLoops != 0) 62 230 { … … 64 232 uint64_t uTscNow = ASMReadTSC(); 65 233 uint64_t cTicks = uTscNow - uTscPrev; 234 unsigned iBit; 235 236 /* check that it doesn't go backwards*/ 237 if ((int64_t)cTicks < 0) 238 Result.cBackwards++; 66 239 67 240 /* min/max/avg */ 68 cTicksSum += cTicks; 69 if (cTicks < cTicksMin) 70 cTicksMin = cTicks; 71 if (cTicks > cTicksMax) 72 cTicksMax = cTicks; 73 74 /* range. */ 75 if (cTicks < _4K) 76 { 77 cBelow4K++; 78 if (cTicks < _1K) 241 Result.cTicksSum += cTicks; 242 if (cTicks < Result.cTicksMin) 243 Result.cTicksMin = cTicks; 244 if (cTicks > Result.cTicksMax) 245 Result.cTicksMax = cTicks; 246 247 /* result distribution by most significant bit. */ 248 iBit = ASMBitLastSetU64(cTicks); 249 Result.aDistribution[iBit] += 1; 250 if (iBit < iMinHistory) 251 { /* likely */ } 252 else 253 { 254 g_aBigHistory[g_iBigHistory].uTsc = uTscPrev; 255 g_aBigHistory[g_iBigHistory].cTicksDelta = cTicks; 256 if (++g_iBigHistory >= RT_ELEMENTS(g_aBigHistory)) 79 257 { 80 cBelow1K++; 81 if (cTicks < 512) 82 cBelow512++; 83 } 84 } 85 else if (cTicks >= _512K) 86 { 87 cAbove512K++; 88 if (cTicks >= _1M) 89 { 90 cAbove1M++; 91 if (cTicks >= 10*_1M) 92 cAbove10M++; 258 g_iBigHistory = 0; 259 g_fBigHistoryWrapped = true; 93 260 } 94 261 } 95 262 } 96 263 } while ((cNsElapsed = Bs3TestNow()) < nsDeadline); 97 cTicksElapsed = ASMReadTSC(); 98 cTicksElapsed -= uTscStart; 99 cNsElapsed -= nsStart; 100 101 /* 102 * Report results. 103 * 104 * Note! in 32-bit and 16-bit mode, values 4G or higher gets formatted as 105 * hexadecimal to avoid 64-bit division. 106 */ 107 108 Bs3TestPrintf("%'14RU64 loops in %'RU64 ns / %'RU64 ticks\n", cTotalLoops, cNsElapsed, cTicksElapsed); 109 110 /* Report results in ticks. */ 111 Bs3TestPrintf("average %RU64 ticks, min %'RU64, max %'RU64\n", 112 cTicksSum / cTotalLoops, cTicksMin, cTicksMax); 113 Bs3TestPrintf("%'14RU64 below 512\n", cBelow512); 114 Bs3TestPrintf("%'14RU64 below 1K\n", cBelow1K); 115 Bs3TestPrintf("%'14RU64 below 4K\n", cBelow4K); 116 Bs3TestPrintf("%'14RU64 inbetween\n", cTotalLoops - cBelow4K - cAbove512K); 117 Bs3TestPrintf("%'14RU64 above 512K\n", cAbove512K); 118 Bs3TestPrintf("%'14RU64 above 1M\n", cAbove1M); 119 Bs3TestPrintf("%'14RU64 above 10M\n", cAbove10M); 120 121 /* results in nanoseconds */ 122 if (cNsElapsed && cTicksElapsed) 123 { 124 /* Missing ??U8M and the assembly bugger. */ 125 //uint64_t const uFreq = ASMMultU64ByU32DivByU32(cTicksElapsed, RT_NS_1SEC, cNsElapsed); 126 //uint64_t const uFreq = cTicksElapsed * (uint64_t)RT_NS_1SEC / cNsElapsed; 127 uint64_t const uFreq = cTicksElapsed / cNsElapsed; 128 Bs3TestPrintf("%RU64 Hz\n", uFreq); 264 265 Result.cTicksElapsed = ASMReadTSC() - uTscStart; 266 Result.cNsElapsed = cNsElapsed - nsStart; 267 268 bs3Timing1_PrintResult(&Result, iRun, uVerbosity); 269 270 /* Add to total. */ 271 g_TotalResult.cNsElapsed += Result.cNsElapsed; 272 g_TotalResult.cTicksElapsed += Result.cTicksElapsed; 273 if (Result.cTicksMin < g_TotalResult.cTicksMin || g_TotalResult.cTicksMin == 0) 274 g_TotalResult.cTicksMin = Result.cTicksMin; 275 if (Result.cTicksMax > g_TotalResult.cTicksMax) 276 g_TotalResult.cTicksMax += Result.cTicksMax; 277 g_TotalResult.cTicksSum += Result.cTicksSum; 278 g_TotalResult.cTotalLoops += Result.cTotalLoops; 279 g_TotalResult.cBackwards += Result.cBackwards; 280 for (i = 0; i < RT_ELEMENTS(Result.aDistribution); i++) 281 g_TotalResult.aDistribution[i] += Result.aDistribution[i]; 282 } 283 284 285 /** 286 * The TSC test driver. 287 * 288 * @param cLoops Number of test iterations. 289 * @param cSecs The number of seconds per iteration. 290 * @param uVerbosity How noisy we should be. 291 * @param iMinHistory The threshold for big gap history. 292 */ 293 static void bs3Timing1_Tsc_Driver(unsigned cLoops, unsigned cSecs, unsigned uVerbosity, unsigned iMinHistory) 294 { 295 unsigned iLoop; 296 297 #if 1 298 /* 299 * Verify that the first/last bit in U64 works (didn't). 300 */ 301 iLoop = ASMBitLastSetU64( UINT64_C(0x1000100010001000)); if (iLoop != 61) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop); 302 iLoop = ASMBitFirstSetU64(UINT64_C(0x1000100010001000)); if (iLoop != 13) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop); 303 iLoop = ASMBitLastSetU64( UINT64_C(0x000ffff000000000)); if (iLoop != 52) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop); 304 iLoop = ASMBitFirstSetU64(UINT64_C(0x000ffff000000000)); if (iLoop != 37) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop); 305 #endif 306 307 /* 308 * Do the work. 309 */ 310 Bs3TestPrintf("Running %u loops, %u second%s each...\n", cLoops, cSecs, cSecs != 1 ? "s" : ""); 311 for (iLoop = 1; iLoop <= cLoops; iLoop++) 312 bs3Timing1_Tsc_One(iLoop, cSecs, uVerbosity, iMinHistory); 313 314 /* 315 * Report the total. 316 */ 317 Bs3TestPrintf("Total:\n"); 318 bs3Timing1_PrintResult(&g_TotalResult, iLoop, uVerbosity + 1); 319 320 /* 321 * Dump the large gap history, if any. 322 */ 323 if (g_fBigHistoryWrapped || g_iBigHistory > 0) 324 { 325 uint32_t const iFirst = g_fBigHistoryWrapped ? g_iBigHistory : 0; 326 uint32_t const iEnd = g_iBigHistory; 327 uint64_t uTscPrev = g_aBigHistory[iFirst].uTsc; 328 uint32_t i = iFirst; 329 Bs3TestPrintf("Big gap history (TSC, prev delta, test delta|level):\n"); 330 do 331 { 332 Bs3TestPrintf(" %'RU64: %'14RU64 - %'14RU64|%u\n", g_aBigHistory[i].uTsc, g_aBigHistory[i].uTsc - uTscPrev, 333 g_aBigHistory[i].cTicksDelta, ASMBitLastSetU64(g_aBigHistory[i].cTicksDelta)); 334 uTscPrev = g_aBigHistory[i].uTsc; 335 if (++i >= RT_ELEMENTS(g_aBigHistory)) 336 i = 0; 337 } while (i != iEnd); 129 338 } 130 } 131 339 else 340 Bs3TestPrintf("No big gap history.\n"); 341 } 342 343 344 #ifndef STANDALONE_EXECUTABLE 132 345 BS3_DECL(void) bs3Timing1_Tsc_pe32(void) 133 346 { 134 347 Bs3TestPrintf("bs3Timing1_Tsc_pe32\n"); 135 bs3Timing1_Tsc_One(); 136 } 348 bs3Timing1_Tsc_Driver(5, 10 /*sec*/, 1 /*uVerbosity*/, 18); 349 } 350 #endif 137 351 138 352 /* P.S. don't forget: VBoxManage setextradata bs3-timing-1 VBoxInternal/Devices/VMMDev/0/Config/TestingEnabled 1 */
Note:
See TracChangeset
for help on using the changeset viewer.