Changeset 108868 in vbox for trunk/src/VBox/VMM
- Timestamp:
- Apr 7, 2025 10:52:07 AM (5 weeks ago)
- svn:sync-xref-src-repo-rev:
- 168318
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/target-armv8/bsd-spec-analyze.py
r108866 r108868 1128 1128 DecoderNode.s_uLogLine += 1; 1129 1129 1130 def constructNextLevel(self, uDepth, uMaxCost): # pylint: disable=too-many-locals 1130 def constructNextLevel(self, uDepth, uMaxCost): # pylint: disable=too-many-locals,too-many-statements 1131 1131 """ 1132 1132 Recursively constructs the … … 1206 1206 cMinTableSizeInBits -= 1; 1207 1207 1208 if uDepth <= 7:1208 if uDepth <= 2: 1209 1209 self.dprint(uDepth, 1210 1210 '%s Start/%u: %#010x (%u) - %u/%u instructions - tab size %u-%u; fChecked=%#x/%#x uCostBest=%#x' … … 1216 1216 if cOccurences >= 2 and fOrgMask > 0 and fOrgMask != 0xffffffff and (fOrgMask & fMaskNotDoneYet) != 0: 1217 1217 # 1218 # Brute force relevant mask variations. 1219 # (The MaskIterator skips masks that are too wide, too fragmented or already covered.) 1218 # Step 1: Brute force relevant mask variations and pick a few masks. 1220 1219 # 1220 # The MaskIterator skips masks that are too wide, too fragmented or 1221 # already covered. 1222 # 1223 # The cost calculation is mainly based on distribution vs table size, 1224 # trying to favor masks with more target slots. 1225 # 1226 dCandidates = {}; 1221 1227 for fMask, cMaskBits, aaiMaskToIdxAlgo in MaskIterator(fOrgMask, cMinTableSizeInBits, cMaxTableSizeInBits, 1222 1228 fMaskNotDoneYet): 1223 if uDepth <= 7:1224 self.dprint(uDepth, '>>> fMask=%#010x cMaskBits=%s aaiMaskToIdxAlgo=%s)...'1225 % (fMask, cMaskBits, aaiMaskToIdxAlgo));1229 #if uDepth <= 2: 1230 # self.dprint(uDepth, '1>> fMask=%#010x cMaskBits=%s aaiMaskToIdxAlgo=%s...' 1231 # % (fMask, cMaskBits, aaiMaskToIdxAlgo)); 1226 1232 #assert cMaskBits <= cMaxTableSizeInBits; 1227 1233 … … 1231 1237 #uCostTmp <<= uDepth; # Make the cost exponentially higher with depth. (?) 1232 1238 if uCostTmp >= uCostBest: 1233 if uDepth <= 7:1234 self.dprint(uDepth, '!!! %#010x too expensive #1: %#x vs %#x' % (fMask, uCostTmp, uCostBest));1239 #if uDepth <= 2: 1240 # self.dprint(uDepth, '!!! %#010x too expensive #1: %#x vs %#x' % (fMask, uCostTmp, uCostBest)); 1235 1241 continue; 1236 1242 1237 1243 # Compile the indexing/unindexing functions. 1238 1244 fnToIndex = MaskZipper.algoToZipLambda(aaiMaskToIdxAlgo, fMask, fCompileMaskZipUnzip); 1239 fnFromIndex = MaskZipper.algoToUnzipLambda(aaiMaskToIdxAlgo, fMask, fCompileMaskZipUnzip);1240 1245 1241 1246 # Insert the instructions into the temporary table. … … 1243 1248 for oInstr in self.aoInstructions: 1244 1249 idx = fnToIndex(oInstr.fFixedValue, aaiMaskToIdxAlgo); 1245 #assert idx == MaskZipper.zipMask(oInstr.fFixedValue & fMask, aaiMaskToIdxAlgo);1246 #assert idx == fnToIndex(fnFromIndex(idx, aaiMaskToIdxAlgo), aaiMaskToIdxAlgo);1247 #assert idx == MaskZipper.zipMask(MaskZipper.unzipMask(idx, aaiMaskToIdxAlgo), aaiMaskToIdxAlgo);1248 1250 #self.dprint(uDepth, '%#010x -> %#05x %s' % (oInstr.fFixedValue, idx, oInstr.sName)); 1249 1251 daoTmp[idx].append(oInstr); … … 1251 1253 # Reject anything that ends up putting all the stuff in a single slot. 1252 1254 if len(daoTmp) <= 1: 1253 if uDepth <= 7: self.dprint(uDepth, '!!! bad distribution #1: fMask=%#x' % (fMask,));1255 #if uDepth <= 2: self.dprint(uDepth, '!!! bad distribution #1: fMask=%#x' % (fMask,)); 1254 1256 continue; 1255 1257 … … 1259 1261 uCostTmp += int(rdAvgLen * 8) 1260 1262 if uCostTmp >= uCostBest: 1261 if uDepth <= 7:1262 self.dprint(uDepth, '!!! %#010x too expensive #2: %#x vs %#x (rdAvgLen=%s)'1263 % (fMask, uCostTmp, uCostBest, rdAvgLen));1263 #if uDepth <= 2: 1264 # self.dprint(uDepth, '!!! %#010x too expensive #2: %#x vs %#x (rdAvgLen=%s)' 1265 # % (fMask, uCostTmp, uCostBest, rdAvgLen)); 1264 1266 continue; 1265 1267 … … 1269 1271 uCostTmp += ((cNominalFill - len(daoTmp)) * 2) #<< uDepth; # 2 = kCostUnusedTabEntry 1270 1272 if uCostTmp >= uCostBest: 1271 if uDepth <= 7:1272 self.dprint(uDepth, '!!! %#010x too expensive #3: %#x vs %#x' % (fMask, uCostTmp, uCostBest));1273 #if uDepth <= 2: 1274 # self.dprint(uDepth, '!!! %#010x too expensive #3: %#x vs %#x' % (fMask, uCostTmp, uCostBest)); 1273 1275 continue; 1274 1276 1277 # Record it as a candidate. 1278 dCandidates[uCostTmp] = (fMask, cMaskBits, aaiMaskToIdxAlgo, daoTmp); 1279 if len(dCandidates) > 64: 1280 dOld = dCandidates; 1281 dCandidates = { uKey:dOld[uKey] for uKey in sorted(dCandidates.keys())[:4] }; 1282 del dOld; 1283 1284 # 1285 # Step 2: Process the top 4 candidates. 1286 # 1287 for uCostTmp in sorted(dCandidates.keys())[:4]: 1288 fMask, cMaskBits, aaiMaskToIdxAlgo, daoTmp = dCandidates[uCostTmp]; 1289 1290 #if uDepth <= 2: 1291 # self.dprint(uDepth, '2>> fMask=%#010x cMaskBits=%s aaiMaskToIdxAlgo=%s #daoTmp=%s...' 1292 # % (fMask, cMaskBits, aaiMaskToIdxAlgo, len(daoTmp),)); 1293 #assert cMaskBits <= cMaxTableSizeInBits; 1294 1275 1295 # Construct decoder nodes from the aaoTmp lists, construct sub-levels and calculate costs. 1296 fnFromIndex = MaskZipper.algoToUnzipLambda(aaiMaskToIdxAlgo, fMask, fCompileMaskZipUnzip); 1276 1297 dChildrenTmp = {}; 1277 1298 try: … … 1285 1306 break; 1286 1307 except DecoderNode.TooExpensive: 1287 if uDepth <= 7:1288 self.dprint(uDepth, '!!! %#010x too expensive #4: %#x+child vs %#x' % (fMask, uCostTmp, uCostBest));1308 #if uDepth <= 2: 1309 # self.dprint(uDepth, '!!! %#010x too expensive #4: %#x+child vs %#x' % (fMask, uCostTmp, uCostBest)); 1289 1310 continue; 1290 1311 … … 1300 1321 fChildrenBest = fMask; 1301 1322 dChildrenBest = dChildrenTmp; 1302 elif uDepth <= 7:1303 self.dprint(uDepth, '!!! %#010x too expensive #5: %#x vs %#x' % (fMask, uCostTmp, uCostBest));1323 #elif uDepth <= 2: 1324 # self.dprint(uDepth, '!!! %#010x too expensive #5: %#x vs %#x' % (fMask, uCostTmp, uCostBest)); 1304 1325 1305 1326 # Note that we've covered all the permutations in the given mask. … … 1316 1337 #assert fChildrenBest.bit_count() == cChildrenBits; 1317 1338 #assert len(dChildrenBest) <= (1 << cChildrenBits) 1318 if uDepth <= 7:1339 if uDepth <= 2: 1319 1340 self.dprint(uDepth, 1320 '===== Final: fMask=%#010x uCost=%#x TabSize=%#x #Instructions=%u in %u slots...'1321 % (fChildrenBest, uCostBest, 1 << cChildrenBits, cInstructions, len(dChildrenBest)));1341 '===== Final: fMask=%#010x (%u) uCost=%#x #Instructions=%u in %u slots over %u entries...' 1342 % (fChildrenBest, cChildrenBits, uCostBest, cInstructions, len(dChildrenBest), 1 << cChildrenBits)); 1322 1343 1323 1344 # Done. … … 1343 1364 """ 1344 1365 self.oDecoderRoot = DecoderNode(sorted(g_aoAllArmInstructions, 1345 key = operator.attrgetter('fFixedMask', 'fFixedValue', 'sName')) [:32],1366 key = operator.attrgetter('fFixedMask', 'fFixedValue', 'sName')), 1346 1367 0, 0); 1347 1368 self.oDecoderRoot.constructNextLevel(0, sys.maxsize);
Note:
See TracChangeset
for help on using the changeset viewer.