VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-armv8/bsd-spec-analyze.py@ 108839

Last change on this file since 108839 was 108839, checked in by vboxsync, 6 weeks ago

VMM/IEM: Working on the ARM bsd/opensource spec reader & decoder generator. Work in progress. jiraref:VBP-1598

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 40.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: bsd-spec-analyze.py 108839 2025-04-03 21:20:56Z vboxsync $
4
5"""
6ARM BSD specification analyser.
7"""
8
9from __future__ import print_function;
10
11__copyright__ = \
12"""
13Copyright (C) 2025 Oracle and/or its affiliates.
14
15This file is part of VirtualBox base platform packages, as
16available from https://www.virtualbox.org.
17
18This program is free software; you can redistribute it and/or
19modify it under the terms of the GNU General Public License
20as published by the Free Software Foundation, in version 3 of the
21License.
22
23This program is distributed in the hope that it will be useful, but
24WITHOUT ANY WARRANTY; without even the implied warranty of
25MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26General Public License for more details.
27
28You should have received a copy of the GNU General Public License
29along with this program; if not, see <https://www.gnu.org/licenses>.
30
31SPDX-License-Identifier: GPL-3.0-only
32"""
33__version__ = "$Revision: 108839 $"
34
35# Standard python imports.
36import argparse;
37import ast;
38import collections;
39import datetime;
40import json;
41import operator;
42import os;
43import re;
44import sys;
45import tarfile;
46import traceback;
47
48
49#
50# The ARM instruction AST stuff.
51#
52
53class ArmAstBase(object):
54 """
55 ARM instruction AST base class.
56 """
57
58 kTypeBinaryOp = 'AST.BinaryOp';
59 kTypeBool = 'AST.Bool';
60 kTypeConcat = 'AST.Concat';
61 kTypeFunction = 'AST.Function';
62 kTypeIdentifier = 'AST.Identifier';
63 kTypeInteger = 'AST.Integer';
64 kTypeSet = 'AST.Set';
65 kTypeSquareOp = 'AST.SquareOp';
66 kTypeUnaryOp = 'AST.UnaryOp';
67 kTypeValue = 'Values.Value';
68
69 def __init__(self, sType):
70 self.sType = sType;
71
72 def assertAttribsInSet(oJson, oAttribSet):
73 """ Checks that the JSON element has all the attributes in the set and nothing else. """
74 assert set(oJson) == oAttribSet, '%s - %s' % (set(oJson) ^ oAttribSet, oJson,);
75
76 kAttribSetBinaryOp = frozenset(['_type', 'left', 'op', 'right']);
77 @staticmethod
78 def fromJsonBinaryOp(oJson):
79 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetBinaryOp);
80 return ArmAstBinaryOp(ArmAstBase.fromJson(oJson['left']), oJson['op'], ArmAstBase.fromJson(oJson['right']));
81
82 kAttribSetUnaryOp = frozenset(['_type', 'op', 'expr']);
83 @staticmethod
84 def fromJsonUnaryOp(oJson):
85 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetUnaryOp);
86 return ArmAstUnaryOp(oJson['op'], ArmAstBase.fromJson(oJson['expr']));
87
88 kAttribSetSquareOp = frozenset(['_type', 'var', 'arguments']);
89 @staticmethod
90 def fromJsonSquareOp(oJson):
91 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetSquareOp);
92 return ArmAstSquareOp(ArmAstBase.fromJson(oJson['var']), [ArmAstBase.fromJson(oArg) for oArg in oJson['arguments']]);
93
94 kAttribSetConcat = frozenset(['_type', 'values']);
95 @staticmethod
96 def fromJsonConcat(oJson):
97 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetConcat);
98 return ArmAstConcat([ArmAstBase.fromJson(oArg) for oArg in oJson['values']]);
99
100 kAttribSetFunction = frozenset(['_type', 'name', 'arguments']);
101 @staticmethod
102 def fromJsonFunction(oJson):
103 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetFunction);
104 return ArmAstFunction(oJson['name'], [ArmAstBase.fromJson(oArg) for oArg in oJson['arguments']]);
105
106 kAttribSetIdentifier = frozenset(['_type', 'value']);
107 @staticmethod
108 def fromJsonIdentifier(oJson):
109 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetIdentifier);
110 return ArmAstIdentifier(oJson['value']);
111
112 kAttribSetBool = frozenset(['_type', 'value']);
113 @staticmethod
114 def fromJsonBool(oJson):
115 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetBool);
116 return ArmAstBool(oJson['value']);
117
118 kAttribSetInteger = frozenset(['_type', 'value']);
119 @staticmethod
120 def fromJsonInteger(oJson):
121 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetInteger);
122 return ArmAstInteger(oJson['value']);
123
124 kAttribSetSet = frozenset(['_type', 'values']);
125 @staticmethod
126 def fromJsonSet(oJson):
127 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetSet);
128 return ArmAstSet([ArmAstBase.fromJson(oArg) for oArg in oJson['values']]);
129
130 kAttribSetValue = frozenset(['_type', 'value', 'meaning']);
131 @staticmethod
132 def fromJsonValue(oJson):
133 ArmAstBase.assertAttribsInSet(oJson, ArmAstBase.kAttribSetValue);
134 return ArmAstValue(oJson['value']);
135
136 kfnTypeMap = {
137 kTypeBinaryOp: fromJsonBinaryOp,
138 kTypeUnaryOp: fromJsonUnaryOp,
139 kTypeSquareOp: fromJsonSquareOp,
140 kTypeConcat: fromJsonConcat,
141 kTypeFunction: fromJsonFunction,
142 kTypeIdentifier: fromJsonIdentifier,
143 kTypeBool: fromJsonBool,
144 kTypeInteger: fromJsonInteger,
145 kTypeSet: fromJsonSet,
146 kTypeValue: fromJsonValue,
147 };
148
149 @staticmethod
150 def fromJson(oJson):
151 """ Decodes an AST/Values expression. """
152 print('debug ast: %s' % oJson['_type'])
153 return ArmAstBase.kfnTypeMap[oJson['_type']](oJson);
154
155
156class ArmAstBinaryOp(ArmAstBase):
157 kOpTypeCompare = 'cmp';
158 kOpTypeLogical = 'log';
159 kOpTypeArithmetical = 'arit';
160 kOpTypeSet = 'set';
161 kdOps = {
162 '||': kOpTypeLogical,
163 '&&': kOpTypeLogical,
164 '==': kOpTypeCompare,
165 '!=': kOpTypeCompare,
166 '>': kOpTypeCompare,
167 '>=': kOpTypeCompare,
168 '<=': kOpTypeCompare,
169 'IN': kOpTypeSet,
170 '+': kOpTypeArithmetical,
171 };
172
173 def __init__(self, oLeft, sOp, oRight):
174 ArmAstBase.__init__(self, ArmAstBase.kTypeBinaryOp);
175 assert sOp in ArmAstBinaryOp.kdOps, 'sOp="%s"' % (sOp,);
176 self.oLeft = oLeft;
177 self.sOp = sOp;
178 self.oRight = oRight;
179
180
181class ArmAstUnaryOp(ArmAstBase):
182 kOpTypeLogical = 'log';
183 kdOps = {
184 '!': kOpTypeLogical,
185 };
186
187 def __init__(self, sOp, oExpr):
188 ArmAstBase.__init__(self, ArmAstBase.kTypeUnaryOp);
189 assert sOp in ArmAstUnaryOp.kdOps, 'sOp=%s' % (sOp,);
190 self.sOp = sOp;
191 self.oExpr = oExpr;
192
193
194class ArmAstSquareOp(ArmAstBase):
195 def __init__(self, aoValues):
196 ArmAstBase.__init__(self, ArmAstBase.kTypeSquareOp);
197 self.aoValues = aoValues;
198
199
200class ArmAstConcat(ArmAstBase):
201 def __init__(self, aoValues):
202 ArmAstBase.__init__(self, ArmAstBase.kTypeConcat);
203 self.aoValues = aoValues;
204
205
206class ArmAstFunction(ArmAstBase):
207 s_oReValidName = re.compile('^[_A-Za-z][_A-Za-z0-9]+$');
208
209 def __init__(self, sName, aoArgs):
210 ArmAstBase.__init__(self, ArmAstBase.kTypeFunction);
211 assert self.s_oReValidName.match(sName), 'sName=%s' % (sName);
212 self.sName = sName;
213 self.aoValues = aoArgs;
214
215
216class ArmAstIdentifier(ArmAstBase):
217 s_oReValidName = re.compile('^[_A-Za-z][_A-Za-z0-9]*$');
218
219 def __init__(self, sName):
220 ArmAstBase.__init__(self, ArmAstBase.kTypeIdentifier);
221 assert self.s_oReValidName.match(sName), 'sName=%s' % (sName);
222 self.sName = sName;
223
224
225class ArmAstBool(ArmAstBase):
226 def __init__(self, fValue):
227 ArmAstBase.__init__(self, ArmAstBase.kTypeBool);
228 assert fValue is True or fValue is False, '%s' % (fValue,);
229 self.fValue = fValue;
230
231
232class ArmAstInteger(ArmAstBase):
233 def __init__(self, iValue):
234 ArmAstBase.__init__(self, ArmAstBase.kTypeInteger);
235 self.iValue = int(iValue);
236
237
238class ArmAstSet(ArmAstBase):
239 def __init__(self, aoValues):
240 ArmAstBase.__init__(self, ArmAstBase.kTypeSet);
241 self.aoValues = aoValues;
242
243
244class ArmAstValue(ArmAstBase):
245 def __init__(self, sValue):
246 ArmAstBase.__init__(self, ArmAstBase.kTypeValue);
247 self.sValue = sValue;
248
249
250#
251# Instructions and their properties.
252#
253
254class ArmEncodesetField(object):
255 """
256 ARM Encodeset.Bits & Encodeset.Field.
257 """
258 def __init__(self, oJson, iFirstBit, cBitsWidth, fFixed, fValue, sName = None):
259 self.oJson = oJson;
260 self.iFirstBit = iFirstBit;
261 self.cBitsWidth = cBitsWidth;
262 self.fFixed = fFixed;
263 self.fValue = fValue;
264 self.sName = sName; ##< None if Encodeset.Bits.
265
266 def __str__(self):
267 sRet = '[%2u:%-2u] = %#x/%#x/%#x' % (
268 self.iFirstBit + self.cBitsWidth - 1, self.iFirstBit, self.fValue, self.fFixed, self.getMask()
269 );
270 if self.sName:
271 sRet += ' # %s' % (self.sName,)
272 return sRet;
273
274 def __repr__(self):
275 return self.__str__();
276
277 def getMask(self):
278 """ Field mask (unshifted). """
279 return (1 << self.cBitsWidth) - 1;
280
281 def getShiftedMask(self):
282 """ Field mask, shifted. """
283 return ((1 << self.cBitsWidth) - 1) << self.iFirstBit;
284
285 @staticmethod
286 def fromJson(oJson):
287 """ """
288 assert oJson['_type'] in ('Instruction.Encodeset.Field', 'Instruction.Encodeset.Bits'), oJson['_type'];
289
290 oRange = oJson['range'];
291 assert oRange['_type'] == 'Range';
292 iFirstBit = int(oRange['start']);
293 cBitsWidth = int(oRange['width']);
294
295 sValue = oJson['value']['value'];
296 assert sValue[0] == '\'' and sValue[-1] == '\'', sValue;
297 sValue = sValue[1:-1];
298 assert len(sValue) == cBitsWidth, 'cBitsWidth=%s sValue=%s' % (cBitsWidth, sValue,);
299 fFixed = 0;
300 fValue = 0;
301 for ch in sValue:
302 assert ch in 'x10', 'ch=%s' % ch;
303 fFixed <<= 1;
304 fValue <<= 1;
305 if ch != 'x':
306 fFixed |= 1;
307 if ch == '1':
308 fValue |= 1;
309
310 sName = oJson['name'] if oJson['_type'] == 'Instruction.Encodeset.Field' else None;
311 return ArmEncodesetField(oJson, iFirstBit, cBitsWidth, fFixed, fValue, sName);
312
313 @staticmethod
314 def fromJsonEncodeset(oJson, aoSet, fCovered):
315 """ """
316 assert oJson['_type'] == 'Instruction.Encodeset.Encodeset', oJson['_type'];
317 for oJsonValue in oJson['values']:
318 oNewField = ArmEncodesetField.fromJson(oJsonValue);
319 fNewMask = oNewField.getShiftedMask();
320 if (fNewMask & fCovered) != fNewMask:
321 aoSet.append(oNewField)
322 fCovered |= fNewMask;
323 return (aoSet, fCovered);
324
325
326class ArmInstruction(object):
327 """
328 ARM instruction
329 """
330 s_oReValidName = re.compile('^[_A-Za-z][_A-Za-z0-9]+$');
331
332 def __init__(self, oJson, sName, sMemonic, aoEncodesets, oCondition):
333 assert self.s_oReValidName.match(sName), 'sName=%s' % (sName);
334 self.oJson = oJson;
335 self.sName = sName;
336 self.sMnemonic = sMemonic;
337 self.sAsmDisplay = '';
338 self.aoEncodesets = aoEncodesets;
339 self.oCondition = oCondition;
340 self.fFixedMask = 0;
341 self.fFixedValue = 0;
342 for oField in aoEncodesets:
343 self.fFixedMask |= oField.fFixed << oField.iFirstBit;
344 self.fFixedValue |= oField.fValue << oField.iFirstBit;
345
346 # State related to decoder.
347 self.fDecoderLeafCheckNeeded = False; ##< Whether we need to check fixed value/mask in leaf decoder functions.
348
349 def __str__(self):
350 sRet = 'sName=%s; sMnemonic=%s fFixedValue/Mask=%#x/%#x encoding=\n %s' % (
351 self.sName, self.sMnemonic, self.fFixedValue, self.fFixedMask,
352 ',\n '.join([str(s) for s in self.aoEncodesets]),
353 );
354 return sRet;
355
356 def __repr__(self):
357 return self.__str__();
358
359 def getCName(self):
360 # Get rid of trailing underscore as it seems pointless.
361 if self.sName[-1] != '_' or self.sName[:-1] in g_dAllArmInstructionsByName:
362 return self.sName;
363 return self.sName[:-1];
364
365
366#
367# AArch64 Specification Loader.
368#
369
370## All the instructions.
371g_aoAllArmInstructions = [] # type: List[ArmInstruction]
372
373## All the instructions by name (not mnemonic.
374g_dAllArmInstructionsByName = {} # type: Dict[ArmInstruction]
375
376#
377# Pass #1 - Snoop up all the instructions and their encodings.
378#
379def parseInstructions(aoStack, aoJson):
380 for oJson in aoJson:
381 if oJson['_type'] == "Instruction.InstructionSet":
382 parseInstructions([oJson,] + aoStack, oJson['children']);
383 elif oJson['_type'] == "Instruction.InstructionGroup":
384 parseInstructions([oJson,] + aoStack, oJson['children']);
385 elif oJson['_type'] == "Instruction.Instruction":
386 (aoEncodesets, fCovered) = ArmEncodesetField.fromJsonEncodeset(oJson['encoding'], [], 0);
387 for oParent in aoStack:
388 if 'encoding' in oParent:
389 (aoEncodesets, fCovered) = ArmEncodesetField.fromJsonEncodeset(oParent['encoding'], aoEncodesets, fCovered);
390 oCondition = ArmAstBase.fromJson(oJson['condition']);
391 oInstr = ArmInstruction(oJson, oJson['name'], oJson['name'], aoEncodesets, oCondition);
392
393 g_aoAllArmInstructions.append(oInstr);
394 assert oInstr.sName not in g_dAllArmInstructionsByName;
395 g_dAllArmInstructionsByName[oInstr.sName] = oInstr;
396 return True;
397
398#
399# Pass #2 - Assembly syntax formatting (for display purposes)
400#
401def asmSymbolsToDisplayText(adSymbols, ddAsmRules, oInstr):
402 sText = '';
403 for dSym in adSymbols:
404 sType = dSym['_type'];
405 if sType == 'Instruction.Symbols.Literal':
406 sText += dSym['value'];
407 elif sType == 'Instruction.Symbols.RuleReference':
408 sRuleId = dSym['rule_id'];
409 sText += asmRuleIdToDisplayText(sRuleId, ddAsmRules, oInstr);
410 else:
411 raise Exception('%s: Unknown assembly symbol type: %s' % (oInstr.sMnemonic, sType,));
412 return sText;
413
414def asmChoicesFilterOutDefaultAndAbsent(adChoices, ddAsmRules):
415 # There are sometime a 'none' tail entry.
416 if adChoices[-1] is None:
417 adChoices = adChoices[:-1];
418 if len(adChoices) > 1:
419 # Typically, one of the choices is 'absent' or 'default', eliminate it before we start...
420 for iChoice, dChoice in enumerate(adChoices):
421 fAllAbsentOrDefault = True;
422 for dSymbol in dChoice['symbols']:
423 if dSymbol['_type'] != 'Instruction.Symbols.RuleReference':
424 fAllAbsentOrDefault = False;
425 break;
426 sRuleId = dSymbol['rule_id'];
427 oRule = ddAsmRules[sRuleId];
428 if ( ('display' in oRule and oRule['display'])
429 or ('symbols' in oRule and oRule['symbols'])):
430 fAllAbsentOrDefault = False;
431 break;
432 if fAllAbsentOrDefault:
433 return adChoices[:iChoice] + adChoices[iChoice + 1:];
434 return adChoices;
435
436def asmRuleIdToDisplayText(sRuleId, ddAsmRules, oInstr):
437 dRule = ddAsmRules[sRuleId];
438 sRuleType = dRule['_type'];
439 if sRuleType == 'Instruction.Rules.Token':
440 assert dRule['default'], '%s: %s' % (oInstr.sMnemonic, sRuleId);
441 return dRule['default'];
442 if sRuleType == 'Instruction.Rules.Rule':
443 assert dRule['display'], '%s: %s' % (oInstr.sMnemonic, sRuleId);
444 return dRule['display'];
445 if sRuleType == 'Instruction.Rules.Choice':
446 # Some of these has display = None and we need to sort it out ourselves.
447 if dRule['display']:
448 return dRule['display'];
449 sText = '{';
450 assert len(dRule['choices']) > 1;
451 for iChoice, dChoice in enumerate(asmChoicesFilterOutDefaultAndAbsent(dRule['choices'], ddAsmRules)):
452 if iChoice > 0:
453 sText += ' | ';
454 sText += asmSymbolsToDisplayText(dChoice['symbols'], ddAsmRules, oInstr);
455 sText += '}';
456
457 # Cache it.
458 dRule['display'] = sText;
459 return sText;
460
461 raise Exception('%s: Unknown assembly rule type: %s for %s' % (oInstr.sMnemonic, sRuleType, sRuleId));
462
463def parseInstructionsPass2(aoInstructions, ddAsmRules):
464 """
465 Uses the assembly rules to construct some assembly syntax string for each
466 instruction in the array.
467 """
468 for oInstr in aoInstructions:
469 if 'assembly' in oInstr.oJson:
470 oAsm = oInstr.oJson['assembly'];
471 assert oAsm['_type'] == 'Instruction.Assembly';
472 assert 'symbols' in oAsm;
473 oInstr.sAsmDisplay = asmSymbolsToDisplayText(oAsm['symbols'], ddAsmRules, oInstr);
474 else:
475 oInstr.sAsmDisplay = oInstr.sMnemonic;
476 return True;
477
478def LoadArmOpenSourceSpecification(oOptions):
479 #
480 # Load the files.
481 #
482 print("loading specs ...");
483 if oOptions.sTarFile:
484 with tarfile.open(oOptions.sTarFile, 'r') as oTarFile:
485 with oTarFile.extractfile(oOptions.sFileInstructions) as oFile:
486 dRawInstructions = json.load(oFile);
487 #with open(sFileFeatures, 'r', encoding = 'utf-8') as oFile:
488 # dRawFeatures = json.load(oFile);
489 #with open(sFileRegisters, 'r', encoding = 'utf-8') as oFile:
490 # dRawRegisters = json.load(oFile);
491 else:
492 if oOptions.sSpecDir:
493 if not os.path.isabs(oOptions.sFileInstructions):
494 oOptions.sFileInstructions = os.path.normpath(os.path.join(oOptions.sSpecDir, oOptions.sFileInstructions));
495 if not os.path.isabs(oOptions.sFileFeatures):
496 oOptions.sFileFeatures = os.path.normpath(os.path.join(oOptions.sSpecDir, oOptions.sFileFeatures));
497 if not os.path.isabs(oOptions.sFileRegisters):
498 oOptions.sFileRegisters = os.path.normpath(os.path.join(oOptions.sSpecDir, oOptions.sFileRegisters));
499
500 with open(oOptions.sFileInstructions, 'r', encoding = 'utf-8') as oFile:
501 dRawInstructions = json.load(oFile);
502 #with open(oOptions.sFileFeatures, 'r', encoding = 'utf-8') as oFile:
503 # dRawFeatures = json.load(oFile);
504 #with open(oOptions.sFileRegisters, 'r', encoding = 'utf-8') as oFile:
505 # dRawRegisters = json.load(oFile);
506 print("... done loading.");
507
508 #
509 # Parse the Instructions.
510 #
511 print("parsing instructions ...");
512 # Pass #1: Collect the instructions.
513 parseInstructions([], dRawInstructions['instructions']);
514 # Pass #2: Assembly syntax.
515 global g_aoAllArmInstructions;
516 parseInstructionsPass2(g_aoAllArmInstructions, dRawInstructions['assembly_rules']);
517
518 # Sort the instruction array by name.
519 g_aoAllArmInstructions = sorted(g_aoAllArmInstructions, key = operator.attrgetter('sName', 'sAsmDisplay'));
520
521 print("Found %u instructions." % (len(g_aoAllArmInstructions),));
522 #oBrk = g_dAllArmInstructionsByName['BRK_EX_exception'];
523 #print("oBrk=%s" % (oBrk,))
524
525 if True:
526 for oInstr in g_aoAllArmInstructions:
527 print('%08x/%08x %s %s' % (oInstr.fFixedMask, oInstr.fFixedValue, oInstr.getCName(), oInstr.sAsmDisplay));
528
529 # Gather stats on fixed bits:
530 if True:
531 dCounts = collections.Counter();
532 for oInstr in g_aoAllArmInstructions:
533 cPopCount = bin(oInstr.fFixedMask).count('1');
534 dCounts[cPopCount] += 1;
535
536 print('');
537 print('Fixed bit pop count distribution:');
538 for i in range(33):
539 if i in dCounts:
540 print(' %2u: %u' % (i, dCounts[i]));
541
542 # Top 10 fixed masks.
543 if True:
544 dCounts = collections.Counter();
545 for oInstr in g_aoAllArmInstructions:
546 dCounts[oInstr.fFixedMask] += 1;
547
548 print('');
549 print('Top 20 fixed masks:');
550 for fFixedMask, cHits in dCounts.most_common(20):
551 print(' %#x: %u times' % (fFixedMask, cHits,));
552
553 return True;
554
555
556#
557# Decoder structure helpers.
558#
559
560class MaskIterator(object):
561 """ Helper class for DecoderNode.constructNextLevel(). """
562
563 ## Maximum number of mask sub-parts.
564 # Lower number means fewer instructions required to convert it into an index.
565 kcMaxMaskParts = 3
566
567 def __init__(self, fMask, cMaxTableSizeInBits, dDictDoneAlready):
568 self.fMask = fMask;
569 self.afVariations = self.variationsForMask(fMask, cMaxTableSizeInBits, dDictDoneAlready);
570
571 def __iter__(self):
572 ## @todo make this more dynamic...
573 return iter(self.afVariations);
574
575 @staticmethod
576 def variationsForMask(fMask, cMaxTableSizeInBits, dDictDoneAlready):
577 dBits = collections.OrderedDict();
578 for iBit in range(32):
579 if fMask & (1 << iBit):
580 dBits[iBit] = 1;
581
582 if len(dBits) > cMaxTableSizeInBits or fMask in dDictDoneAlready:
583 aiRet = [];
584 elif len(dBits) > 0:
585 aaiMaskAlgo = DecoderNode.compactMaskAsList(list(dBits));
586 if len(aaiMaskAlgo) <= MaskIterator.kcMaxMaskParts:
587 dDictDoneAlready[fMask] = 1;
588 aiRet = [(fMask, list(dBits), aaiMaskAlgo)];
589 else:
590 aiRet = [];
591 else:
592 return [];
593
594 def recursive(fMask, dBits):
595 if len(dBits) > 0 and fMask not in dDictDoneAlready:
596 if len(dBits) <= cMaxTableSizeInBits:
597 aaiMaskAlgo = DecoderNode.compactMaskAsList(list(dBits));
598 if len(aaiMaskAlgo) <= MaskIterator.kcMaxMaskParts:
599 dDictDoneAlready[fMask] = 1;
600 aiRet.append((fMask, list(dBits), aaiMaskAlgo));
601 if len(dBits) > 1:
602 dChildBits = collections.OrderedDict(dBits);
603 for iBit in dBits.keys():
604 del dChildBits[iBit];
605 recursive(fMask & ~(1 << iBit), dChildBits)
606
607 if len(dBits) > 1:
608 dChildBits = collections.OrderedDict(dBits);
609 for iBit in dBits.keys():
610 del dChildBits[iBit];
611 recursive(fMask & ~(1 << iBit), dChildBits)
612
613 print("debug: fMask=%#x len(aiRet)=%d" % (fMask, len(aiRet),));
614 return aiRet;
615
616class DecoderNode(object):
617
618 ## The absolute maximum table size in bits index by the log2 of the instruction count.
619 kacMaxTableSizesInBits = (
620 2, # [2^0 = 1] => 4
621 4, # [2^1 = 2] => 16
622 5, # [2^2 = 4] => 32
623 6, # [2^3 = 8] => 64
624 7, # [2^4 = 16] => 128
625 7, # [2^5 = 32] => 128
626 8, # [2^6 = 64] => 256
627 9, # [2^7 = 128] => 512
628 10, # [2^8 = 256] => 1024
629 11, # [2^9 = 512] => 2048
630 12, # [2^10 = 1024] => 4096
631 13, # [2^11 = 2048] => 8192
632 14, # [2^12 = 4096] => 16384
633 14, # [2^13 = 8192] => 16384
634 15, # [2^14 =16384] => 32768
635 );
636
637 def __init__(self, aoInstructions: list[ArmInstruction], fCheckedMask: int, fCheckedValue: int, uDepth: int):
638 assert (~fCheckedMask & fCheckedValue) == 0;
639 for idxInstr, oInstr in enumerate(aoInstructions):
640 assert ((oInstr.fFixedValue ^ fCheckedValue) & fCheckedMask & oInstr.fFixedMask) == 0, \
641 '%s: fFixedValue=%#x fFixedMask=%#x fCheckedValue=%#x fCheckedMask=%#x -> %#x\n %s' \
642 % (idxInstr, oInstr.fFixedValue, oInstr.fFixedMask, fCheckedValue, fCheckedMask,
643 (oInstr.fFixedValue ^ fCheckedValue) & fCheckedMask & oInstr.fFixedMask,
644 '\n '.join(['%s: %#010x/%#010x %s' % (i, oInstr2.fFixedValue, oInstr2.fFixedMask, oInstr2.sName)
645 for i, oInstr2 in enumerate(aoInstructions[:idxInstr+2])]));
646
647 self.aoInstructions = aoInstructions; ##< The instructions at this level.
648 self.fCheckedMask = fCheckedMask; ##< The opcode bit mask covered thus far.
649 self.fCheckedValue = fCheckedValue; ##< The value that goes with fCheckedMask.
650 self.uDepth = uDepth; ##< The current node depth.
651 self.uCost = 0; ##< The cost at this level.
652 self.fLeafCheckNeeded = len(aoInstructions) == 1 and (aoInstructions[0].fFixedMask & ~self.fCheckedMask) != 0;
653 self.fChildMask = 0; ##< The mask used to separate the children.
654 self.aoChildren = []; ##< Children, populated by constructNextLevel().
655
656 @staticmethod
657 def compactMask(fMask):
658 """
659 Returns an with instructions for extracting the bits from the mask into
660 a compacted form. Each array entry is an array/tuple of source bit [0],
661 destination bit [1], and bit counts [2].
662 """
663 aaiAlgo = [];
664 iSrcBit = 0;
665 iDstBit = 0;
666 while fMask > 0:
667 if fMask & 1:
668 cCount = 1
669 fMask >>= 1;
670 while fMask & 1:
671 fMask >>= 1;
672 cCount += 1
673 aaiAlgo.append([iSrcBit, iDstBit, cCount])
674 iSrcBit += cCount;
675 iDstBit += cCount;
676 else:
677 iSrcBit += 1;
678 return aaiAlgo;
679
680 @staticmethod
681 def compactMaskAsList(dOrderedDict):
682 """
683 Returns an with instructions for extracting the bits from the mask into
684 a compacted form. Each array entry is an array/tuple of source bit [0],
685 destination bit [1], and mask (shifted to pos 0) [2].
686 """
687 aaiAlgo = [];
688 iDstBit = 0;
689 i = 0;
690 while i < len(dOrderedDict):
691 iSrcBit = dOrderedDict[i];
692 cCount = 1;
693 i += 1;
694 while i < len(dOrderedDict) and dOrderedDict[i] == iSrcBit + cCount:
695 cCount += 1;
696 i += 1;
697 aaiAlgo.append([iSrcBit, iDstBit, (1 << cCount) - 1])
698 iDstBit += cCount;
699 return aaiAlgo;
700
701 @staticmethod
702 def compactDictAlgoToLambda(aaiAlgo):
703 assert(aaiAlgo)
704 sBody = '';
705 for iSrcBit, iDstBit, fMask in aaiAlgo:
706 if sBody:
707 sBody += ' | ';
708 assert iSrcBit >= iDstBit;
709 if iDstBit == 0:
710 if iSrcBit == 0:
711 sBody += '(uValue & %#x)' % (fMask,);
712 else:
713 sBody += '((uValue >> %u) & %#x)' % (iSrcBit, fMask);
714 else:
715 sBody += '((uValue >> %u) & %#x)' % (iSrcBit - iDstBit, fMask << iDstBit);
716 return eval('lambda uValue: ' + sBody);
717
718 @staticmethod
719 def compactDictAlgoToLambdaRev(aaiAlgo):
720 assert(aaiAlgo)
721 sBody = '';
722 for iSrcBit, iDstBit, fMask in aaiAlgo:
723 if sBody:
724 sBody += ' | ';
725 if iDstBit == 0:
726 if iSrcBit == 0:
727 sBody += '(uIdx & %#x)' % (fMask,);
728 else:
729 sBody += '((uIdx & %#x) << %u)' % (fMask, iSrcBit);
730 else:
731 sBody += '((uIdx << %u) & %#x)' % (iSrcBit - iDstBit, fMask << iSrcBit);
732 return eval('lambda uIdx: ' + sBody);
733
734 @staticmethod
735 def toIndexByMask(uValue, aaiAlgo):
736 idxRet = 0;
737 for iSrcBit, iDstBit, fMask in aaiAlgo:
738 idxRet |= ((uValue >> iSrcBit) & fMask) << iDstBit;
739 return idxRet;
740
741 @staticmethod
742 def popCount(uValue):
743 cBits = 0;
744 while uValue:
745 cBits += 1;
746 uValue &= uValue - 1;
747 return cBits;
748
749 def constructNextLevel(self):
750 """
751 Recursively constructs the
752 """
753 # Special case: leaf.
754 if len(self.aoInstructions) <= 1:
755 assert len(self.aoChildren) == 0;
756 return 16 if self.fLeafCheckNeeded else 0;
757
758 # Do an inventory of the fixed masks used by the instructions.
759 dMaskCounts = collections.Counter();
760 for oInstr in self.aoInstructions:
761 dMaskCounts[oInstr.fFixedMask & ~self.fCheckedMask] += 1;
762 assert 0 not in dMaskCounts or dMaskCounts[0] <= 1, \
763 'dMaskCounts=%s len(self.aoInstructions)=%s\n%s' % (dMaskCounts, len(self.aoInstructions),self.aoInstructions);
764
765 # Determine the max table size for the number of instructions we have.
766 cInstructionsAsShift = 1;
767 while (1 << cInstructionsAsShift) < len(self.aoInstructions):
768 cInstructionsAsShift += 1;
769 #cMaxTableSizeInBits = self.kacMaxTableSizesInBits[cInstructionsAsShift];
770
771 # Work thru the possible masks and test out the possible variations (brute force style).
772 uCostBest = 0x7fffffffffffffff;
773 fChildrenBest = 0;
774 aoChildrenBest = None;
775 dDictDoneAlready = {}
776 for fOrgMask, cOccurences in dMaskCounts.most_common(8):
777 cOccurencesAsShift = 1;
778 while (1 << cOccurencesAsShift) < cOccurences:
779 cOccurencesAsShift += 1;
780 cMaxTableSizeInBits = self.kacMaxTableSizesInBits[cOccurencesAsShift]; # Not quite sure about this...
781 print('debug: %#010x (%u) - %u instructions - max tab size %u'
782 % (fOrgMask, self.popCount(fOrgMask), cOccurences, cMaxTableSizeInBits,));
783
784 # Skip pointless stuff.
785 if cOccurences >= 2 and fOrgMask > 0 and fOrgMask != 0xffffffff:
786
787 # Brute force all the mask variations (minus those which are too wide).
788 for fMask, dOrderedDictMask, aaiMaskToIdxAlgo in MaskIterator(fOrgMask, cMaxTableSizeInBits, dDictDoneAlready):
789 print('debug: >>> fMask=%#010x...' % (fMask,));
790 assert len(dOrderedDictMask) <= cMaxTableSizeInBits;
791 fnToIndex = self.compactDictAlgoToLambda(aaiMaskToIdxAlgo);
792 fnFromIndex = self.compactDictAlgoToLambdaRev(aaiMaskToIdxAlgo);
793
794 aaoTmp = [];
795 for i in range(1 << len(dOrderedDictMask)):
796 aaoTmp.append(list());
797
798 for oInstr in self.aoInstructions:
799 idx = fnToIndex(oInstr.fFixedValue);
800 #idx = self.toIndexByMask(oInstr.fFixedValue, aaiMaskToIdxAlgo)
801 assert idx == self.toIndexByMask(oInstr.fFixedValue & fMask, aaiMaskToIdxAlgo);
802 print('debug: %#010x -> %#05x %s' % (oInstr.fFixedValue, idx, oInstr.sName));
803 aoList = aaoTmp[idx];
804 aoList.append(oInstr);
805
806 aoChildrenTmp = [];
807 uCostTmp = 0;
808 for idx, aoInstrs in enumerate(aaoTmp):
809 oChild = DecoderNode(aoInstrs,
810 self.fCheckedMask | fMask,
811 self.fCheckedValue | fnFromIndex(idx),
812 self.uDepth + 1);
813 aoChildrenTmp.append(oChild);
814 uCostTmp += oChild.constructNextLevel();
815
816 if uCostTmp < uCostBest:
817 uCostBest = uCostTmp;
818 fChildrenBest = fMask;
819 aoChildrenBest = aoChildrenTmp;
820
821 if aoChildrenBest is None:
822 pass; ## @todo
823 return uCostBest;
824
825
826
827
828
829
830
831
832
833#
834# Generators
835#
836
837class IEMArmGenerator(object):
838
839 def __init__(self):
840 self.oDecoderRoot = None;
841
842
843 def constructDecoder(self):
844 """
845 Creates the decoder to the best our abilities.
846 """
847 global g_aoAllArmInstructions;
848 self.oDecoderRoot = DecoderNode(g_aoAllArmInstructions, 0, 0, 0);
849 self.oDecoderRoot.constructNextLevel();
850
851
852 def generateLicenseHeader(self):
853 """
854 Returns the lines for a license header.
855 """
856 return [
857 '/*',
858 ' * Autogenerated by $Id: bsd-spec-analyze.py 108839 2025-04-03 21:20:56Z vboxsync $ ',
859 ' * Do not edit!',
860 ' */',
861 '',
862 '/*',
863 ' * Copyright (C) 2025-' + str(datetime.date.today().year) + ' Oracle and/or its affiliates.',
864 ' *',
865 ' * This file is part of VirtualBox base platform packages, as',
866 ' * available from https://www.virtualbox.org.',
867 ' *',
868 ' * This program is free software; you can redistribute it and/or',
869 ' * modify it under the terms of the GNU General Public License',
870 ' * as published by the Free Software Foundation, in version 3 of the',
871 ' * License.',
872 ' *',
873 ' * This program is distributed in the hope that it will be useful, but',
874 ' * WITHOUT ANY WARRANTY; without even the implied warranty of',
875 ' * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU',
876 ' * General Public License for more details.',
877 ' *',
878 ' * You should have received a copy of the GNU General Public License',
879 ' * along with this program; if not, see <https://www.gnu.org/licenses>.',
880 ' *',
881 ' * The contents of this file may alternatively be used under the terms',
882 ' * of the Common Development and Distribution License Version 1.0',
883 ' * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included',
884 ' * in the VirtualBox distribution, in which case the provisions of the',
885 ' * CDDL are applicable instead of those of the GPL.',
886 ' *',
887 ' * You may elect to license modified versions of this file under the',
888 ' * terms and conditions of either the GPL or the CDDL or both.',
889 ' *',
890 ' * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0',
891 ' */',
892 '',
893 '',
894 '',
895 ];
896
897 def generateImplementationStubs(self):
898 """
899 Generate implementation stubs.
900 """
901 return [];
902
903
904 def generateDecoderFunctions(self):
905 """
906 Generates the leaf decoder functions.
907 """
908 asLines = [];
909 for oInstr in g_aoAllArmInstructions:
910 sCName = oInstr.getCName();
911 asLines.extend([
912 '',
913 '/* %08x/%08x: %s */' % (oInstr.fFixedMask, oInstr.fFixedValue, oInstr.sAsmDisplay,),
914 'FNIEMOP_DEF_1(iemDecode_%s, uint32_t, uOpcode)' % (sCName,),
915 '{',
916 ]);
917
918 # The final decoding step, if needed.
919 if oInstr.fDecoderLeafCheckNeeded:
920 asLines.extend([
921 ' if ((uOpcode & %#x) == %#x) { /* likely */ }' % (oInstr.fFixedMask, oInstr.fFixedValue,),
922 ' else',
923 ' {',
924 ' LogFlow(("Invalid instruction %%#x at %%x\n", uOpcode, pVCpu->cpum.GstCtx.Pc.u64));',
925 ' return IEMOP_RAISE_INVALID_OPCODE_RET();',
926 ' }',
927 ]);
928
929 # Decode the fields and prepare for passing them as arguments.
930 asArgs = [];
931 sLogFmt = '';
932 ## @todo Most of this should be done kept in the instruction.
933
934 asLines.extend([
935 ' LogFlow(("%s%s\\n"%s));' % (sCName, sLogFmt, ', '.join(asArgs),),
936 '#ifdef HAS_IMPL_%s' % (sCName,),
937 ' return iemImpl_%s(%s);' % (sCName, ', '.join(asArgs),),
938 '#else',
939 ' RT_NOREF(%s);' % (', '.join(asArgs) if asArgs else 'uOpcode') ,
940 ' return VERR_IEM_INSTR_NOT_IMPLEMENTED;',
941 '#endif',
942 '}',
943 ]);
944 return asLines;
945
946
947 def generateDecoderCpp(self, iPartNo):
948 """ Generates the decoder data & code. """
949 _ = iPartNo;
950 asLines = self.generateLicenseHeader();
951 asLines.extend([
952 '#define LOG_GROUP LOG_GROUP_IEM',
953 '#define VMCPU_INCL_CPUM_GST_CTX',
954 '#include "IEMInternal.h"',
955 '#include "vm.h"',
956 '',
957 '#include "iprt/armv8.h"',
958 '',
959 '',
960 ]);
961
962
963 asLines += self.generateDecoderFunctions();
964
965 return (True, asLines);
966
967
968 def main(self, asArgs):
969 """ Main function. """
970
971 #
972 # Parse arguments.
973 #
974 oArgParser = argparse.ArgumentParser(add_help = False);
975 oArgParser.add_argument('--tar',
976 metavar = 'AARCHMRS_BSD_A_profile-2024-12.tar.gz',
977 dest = 'sTarFile',
978 action = 'store',
979 default = None,
980 help = 'Specification TAR file to get the files from.');
981 oArgParser.add_argument('--instructions',
982 metavar = 'Instructions.json',
983 dest = 'sFileInstructions',
984 action = 'store',
985 default = 'Instructions.json',
986 help = 'The path to the instruction specficiation file.');
987 oArgParser.add_argument('--features',
988 metavar = 'Features.json',
989 dest = 'sFileFeatures',
990 action = 'store',
991 default = 'Features.json',
992 help = 'The path to the features specficiation file.');
993 oArgParser.add_argument('--registers',
994 metavar = 'Registers.json',
995 dest = 'sFileRegisters',
996 action = 'store',
997 default = 'Registers.json',
998 help = 'The path to the registers specficiation file.');
999 oArgParser.add_argument('--spec-dir',
1000 metavar = 'dir',
1001 dest = 'sSpecDir',
1002 action = 'store',
1003 default = '',
1004 help = 'Specification directory to prefix the specficiation files with.');
1005 oArgParser.add_argument('--out-decoder',
1006 metavar = 'file-decoder.cpp',
1007 dest = 'sFileDecoderCpp',
1008 action = 'store',
1009 default = '-',
1010 help = 'The output C++ file for the decoder.');
1011 oOptions = oArgParser.parse_args(asArgs[1:]);
1012
1013 #
1014 # Load the specification.
1015 #
1016 if LoadArmOpenSourceSpecification(oOptions):
1017 #
1018 # Sort out the decoding.
1019 #
1020 self.constructDecoder();
1021
1022 #
1023 # Output.
1024 #
1025 aaoOutputFiles = [
1026 ( oOptions.sFileDecoderCpp, self. generateDecoderCpp, 0, ),
1027 ];
1028 fRc = True;
1029 for sOutFile, fnGenMethod, iPartNo in aaoOutputFiles:
1030 if sOutFile == '-':
1031 oOut = sys.stdout;
1032 else:
1033 try:
1034 oOut = open(sOutFile, 'w'); # pylint: disable=consider-using-with,unspecified-encoding
1035 except Exception as oXcpt:
1036 print('error! Failed open "%s" for writing: %s' % (sOutFile, oXcpt,), file = sys.stderr);
1037 return 1;
1038
1039 (fRc2, asLines) = fnGenMethod(iPartNo);
1040 fRc = fRc2 and fRc;
1041
1042 oOut.write('\n'.join(asLines));
1043 if oOut != sys.stdout:
1044 oOut.close();
1045 if fRc:
1046 return 0;
1047
1048 return 1;
1049
1050
1051if __name__ == '__main__':
1052 try:
1053 sys.exit(IEMArmGenerator().main(sys.argv));
1054 except Exception as oXcpt:
1055 print('Exception Caught!', flush = True);
1056 cMaxLines = 1;
1057 try: cchMaxLen = os.get_terminal_size()[0] * cMaxLines;
1058 except: cchMaxLen = 80 * cMaxLines;
1059 cchMaxLen -= len(' = ...');
1060
1061 oTB = traceback.TracebackException.from_exception(oXcpt, limit = None, capture_locals = True);
1062 # No locals for the outer frame.
1063 oTB.stack[0].locals = {};
1064 # Suppress insanely long variable values.
1065 for oFrameSummary in oTB.stack:
1066 if oFrameSummary.locals:
1067 #for sToDelete in ['ddAsmRules', 'aoInstructions',]:
1068 # if sToDelete in oFrameSummary.locals:
1069 # del oFrameSummary.locals[sToDelete];
1070 for sKey, sValue in oFrameSummary.locals.items():
1071 if len(sValue) > cchMaxLen - len(sKey):
1072 sValue = sValue[:cchMaxLen - len(sKey)] + ' ...';
1073 if '\n' in sValue:
1074 sValue = sValue.split('\n')[0] + ' ...';
1075 oFrameSummary.locals[sKey] = sValue;
1076 oTB.print();
1077
1078
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette