VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65836

Last change on this file since 65836 was 65836, checked in by vboxsync, 8 years ago

IEMAllInstructionsPython.py: Fixes.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 80.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 65836 2017-02-21 17:23:11Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 65836 $"
35
36# Standard python imports.
37import os
38import re
39import sys
40
41# Only the main script needs to modify the path.
42g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43 'ValidationKit');
44sys.path.append(g_ksValidationKitDir);
45
46from common import utils;
47
48# Python 3 hacks:
49if sys.version_info[0] >= 3:
50 long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53# Annotation example:
54#
55# \@opmnemonic add
56# \@op1 reg:Eb
57# \@op2 rm:Gb
58# \@opmaps onebyte
59# \@oppfx none
60# \@opcode 0x00
61# \@openc ModR/M
62# \@opfltest none
63# \@opflmodify of,sz,zf,af,pf,cf
64# \@opflundef none
65# \@opflset none
66# \@opflclear none
67# \@ophints harmless
68# \@opstats add_Eb_Gb
69# \@opgroup op_gen_arith_bin
70# \@optest in1=1 in2=1 -> out1=2 outfl=a?,p?
71# \@optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
72
73
74g_kdX86EFlagsConstants = {
75 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
76 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
77 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
78 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
79 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
80 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
81 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
82 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
83 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
84 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
85 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
86 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
87 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
88 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
89 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
90 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
91 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
92 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
93 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
94 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
95};
96
97
98def _isValidOpcodeByte(sOpcode):
99 """
100 Checks if sOpcode is a valid lower case opcode byte.
101 Returns true/false.
102 """
103 if len(sOpcode) == 4:
104 if sOpcode[:2] == '0x':
105 if sOpcode[2] in '0123456789abcdef':
106 if sOpcode[3] in '0123456789abcdef':
107 return True;
108 return False;
109
110
111class InstructionMap(object):
112 """
113 Instruction map.
114
115 The opcode map provides the lead opcode bytes (empty for the one byte
116 opcode map). An instruction can be member of multiple opcode maps as long
117 as it uses the same opcode value within the map (because of VEX).
118 """
119
120 kdEncodings = {
121 'legacy': [],
122 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
123 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
124 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
125 'xop8': [], ##< XOP prefix with vvvvv = 8
126 'xop9': [], ##< XOP prefix with vvvvv = 9
127 'xop10': [], ##< XOP prefix with vvvvv = 10
128 };
129 kdSelectors = {
130 'byte': [], ##< next opcode byte selects the instruction (default).
131 '/r': [], ##< modrm.reg selects the instruction.
132 'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
133 '!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
134 '11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
135 '11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
136 };
137
138 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
139 assert sSelector in self.kdSelectors;
140 assert sEncoding in self.kdEncodings;
141 if asLeadOpcodes is None:
142 asLeadOpcodes = [];
143 else:
144 for sOpcode in asLeadOpcodes:
145 assert _isValidOpcodeByte(sOpcode);
146
147 self.sName = sName;
148 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
149 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
150 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
151 self.aoInstructions = []; # type: Instruction
152
153
154class TestType(object):
155 """
156 Test value type.
157
158 This base class deals with integer like values. The fUnsigned constructor
159 parameter indicates the default stance on zero vs sign extending. It is
160 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
161 """
162 def __init__(self, sName, acbSizes = None, fUnsigned = True):
163 self.sName = sName;
164 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
165 self.fUnsigned = fUnsigned;
166
167 class BadValue(Exception):
168 """ Bad value exception. """
169 def __init__(self, sMessage):
170 Exception.__init__(self, sMessage);
171 self.sMessage = sMessage;
172
173 ## For ascii ~ operator.
174 kdHexInv = {
175 '0': 'f',
176 '1': 'e',
177 '2': 'd',
178 '3': 'c',
179 '4': 'b',
180 '5': 'a',
181 '6': '9',
182 '7': '8',
183 '8': '7',
184 '9': '6',
185 'a': '5',
186 'b': '4',
187 'c': '3',
188 'd': '2',
189 'e': '1',
190 'f': '0',
191 };
192
193 def get(self, sValue):
194 """
195 Get the shortest normal sized byte representation of oValue.
196
197 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
198 The latter form is for AND+OR pairs where the first entry is what to
199 AND with the field and the second the one or OR with.
200
201 Raises BadValue if invalid value.
202 """
203 if len(sValue) == 0:
204 raise TestType.BadValue('empty value');
205
206 # Deal with sign and detect hexadecimal or decimal.
207 fSignExtend = not self.fUnsigned;
208 if sValue[0] == '-' or sValue[0] == '+':
209 fSignExtend = True;
210 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
211 else:
212 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
213
214 # try convert it to long integer.
215 try:
216 iValue = long(sValue, 16 if fHex else 10);
217 except Exception as oXcpt:
218 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
219
220 # Convert the hex string and pad it to a decent value. Negative values
221 # needs to be manually converted to something non-negative (~-n + 1).
222 if iValue >= 0:
223 sHex = hex(iValue);
224 if sys.version_info[0] < 3:
225 assert sHex[-1] == 'L';
226 sHex = sHex[:-1];
227 assert sHex[:2] == '0x';
228 sHex = sHex[2:];
229 else:
230 sHex = hex(-iValue - 1);
231 if sys.version_info[0] < 3:
232 assert sHex[-1] == 'L';
233 sHex = sHex[:-1];
234 assert sHex[:2] == '0x';
235 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
236
237 cDigits = len(sHex);
238 if cDigits <= self.acbSizes[-1] * 2:
239 for cb in self.acbSizes:
240 if cDigits <= cb * 2:
241 cDigits = int((cDigits + cb - 1) / cb) * cb; # Seems like integer division returns a float in python.
242 break;
243 else:
244 cDigits = int((cDigits + self.acbSizes[-1] - 1) / self.acbSizes[-1]) * self.acbSizes[-1];
245 assert isinstance(cDigits, int)
246
247 if cDigits != len(sHex):
248 cNeeded = cDigits - len(sHex);
249 if iValue >= 0:
250 sHex = ('0' * cNeeded) + sHex;
251 else:
252 sHex = ('f' * cNeeded) + sHex;
253
254 # Invert and convert to bytearray and return it.
255 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
256
257 return ((fSignExtend, abValue),);
258
259 def validate(self, sValue):
260 """
261 Returns True if value is okay, error message on failure.
262 """
263 try:
264 self.get(sValue);
265 except TestType.BadValue as oXcpt:
266 return oXcpt.sMessage;
267 return True;
268
269 def isAndOrPair(self, sValue):
270 """
271 Checks if sValue is a pair.
272 """
273 return False;
274
275
276class TestTypeEflags(TestType):
277 """
278 Special value parsing for EFLAGS/RFLAGS/FLAGS.
279 """
280
281 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
282
283 def __init__(self, sName):
284 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
285
286 def get(self, sValue):
287 print('get(%s)' % (sValue,));
288 fClear = 0;
289 fSet = 0;
290 for sFlag in sValue.split(','):
291 sConstant = SimpleParser.kdEFlags.get(sFlag, None);
292 if sConstant is None:
293 print('get(%s) raise for %s/%s' % (sValue, sFlag,sConstant));
294 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
295 if sConstant[0] == '!':
296 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
297 else:
298 fSet |= g_kdX86EFlagsConstants[sConstant];
299
300 print('get -> TestType.get');
301 aoSet = TestType.get(self, '0x%x' % (fSet,));
302 print('get: aoSet=%s' % (aoSet,));
303 if fClear != 0:
304 print('get -> TestType.get(%#x)' % (~fClear));
305 try:
306 aoClear = TestType.get(self, '%#x' % (~fClear))
307 except Exception as oXcpt:
308 print( '%s' % (oXcpt,))
309 raise;
310 print('get: aoClear=%s' % (aoSet,));
311 assert self.isAndOrPair(sValue) == True;
312 return (aoClear[0], aoSet[0]);
313 assert self.isAndOrPair(sValue) == False;
314 return aoSet;
315
316 def isAndOrPair(self, sValue):
317 for sZeroFlag in self.kdZeroValueFlags.keys():
318 if sValue.find(sZeroFlag) >= 0:
319 print('isAndOrPair(%s) -> True' % (sValue,));
320 return True;
321 print('isAndOrPair(%s) -> False' % (sValue,));
322 return False;
323
324
325
326class TestInOut(object):
327 """
328 One input or output state modifier.
329
330 This should be thought as values to modify BS3REGCTX and extended (needs
331 to be structured) state.
332 """
333 ## Assigned operators.
334 kasOperators = [
335 '&~=',
336 '&=',
337 '|=',
338 '='
339 ];
340 ## Types
341 kdTypes = {
342 'uint': TestType('uint', fUnsigned = True),
343 'int': TestType('int'),
344 'efl': TestTypeEflags('efl'),
345 };
346 ## CPU context fields.
347 kdFields = {
348 # name: ( default type, tbd, )
349 # Operands.
350 'op1': ( 'uint', '', ), ## \@op1
351 'op2': ( 'uint', '', ), ## \@op2
352 'op3': ( 'uint', '', ), ## \@op3
353 'op4': ( 'uint', '', ), ## \@op4
354 # Flags.
355 'efl': ( 'efl', '', ),
356 # 8-bit GPRs.
357 'al': ( 'uint', '', ),
358 'cl': ( 'uint', '', ),
359 'dl': ( 'uint', '', ),
360 'bl': ( 'uint', '', ),
361 'ah': ( 'uint', '', ),
362 'ch': ( 'uint', '', ),
363 'dh': ( 'uint', '', ),
364 'bh': ( 'uint', '', ),
365 'r8l': ( 'uint', '', ),
366 'r9l': ( 'uint', '', ),
367 'r10l': ( 'uint', '', ),
368 'r11l': ( 'uint', '', ),
369 'r12l': ( 'uint', '', ),
370 'r13l': ( 'uint', '', ),
371 'r14l': ( 'uint', '', ),
372 'r15l': ( 'uint', '', ),
373 # 16-bit GPRs.
374 'ax': ( 'uint', '', ),
375 'dx': ( 'uint', '', ),
376 'cx': ( 'uint', '', ),
377 'bx': ( 'uint', '', ),
378 'sp': ( 'uint', '', ),
379 'bp': ( 'uint', '', ),
380 'si': ( 'uint', '', ),
381 'di': ( 'uint', '', ),
382 'r8w': ( 'uint', '', ),
383 'r9w': ( 'uint', '', ),
384 'r10w': ( 'uint', '', ),
385 'r11w': ( 'uint', '', ),
386 'r12w': ( 'uint', '', ),
387 'r13w': ( 'uint', '', ),
388 'r14w': ( 'uint', '', ),
389 'r15w': ( 'uint', '', ),
390 # 32-bit GPRs.
391 'eax': ( 'uint', '', ),
392 'edx': ( 'uint', '', ),
393 'ecx': ( 'uint', '', ),
394 'ebx': ( 'uint', '', ),
395 'esp': ( 'uint', '', ),
396 'ebp': ( 'uint', '', ),
397 'esi': ( 'uint', '', ),
398 'edi': ( 'uint', '', ),
399 'r8d': ( 'uint', '', ),
400 'r9d': ( 'uint', '', ),
401 'r10d': ( 'uint', '', ),
402 'r11d': ( 'uint', '', ),
403 'r12d': ( 'uint', '', ),
404 'r13d': ( 'uint', '', ),
405 'r14d': ( 'uint', '', ),
406 'r15d': ( 'uint', '', ),
407 # 64-bit GPRs.
408 'rax': ( 'uint', '', ),
409 'rdx': ( 'uint', '', ),
410 'rcx': ( 'uint', '', ),
411 'rbx': ( 'uint', '', ),
412 'rsp': ( 'uint', '', ),
413 'rbp': ( 'uint', '', ),
414 'rsi': ( 'uint', '', ),
415 'rdi': ( 'uint', '', ),
416 'r8': ( 'uint', '', ),
417 'r9': ( 'uint', '', ),
418 'r10': ( 'uint', '', ),
419 'r11': ( 'uint', '', ),
420 'r12': ( 'uint', '', ),
421 'r13': ( 'uint', '', ),
422 'r14': ( 'uint', '', ),
423 'r15': ( 'uint', '', ),
424 # 16-bit, 32-bit or 64-bit registers according to operand size.
425 'oz.rax': ( 'uint', '', ),
426 'oz.rdx': ( 'uint', '', ),
427 'oz.rcx': ( 'uint', '', ),
428 'oz.rbx': ( 'uint', '', ),
429 'oz.rsp': ( 'uint', '', ),
430 'oz.rbp': ( 'uint', '', ),
431 'oz.rsi': ( 'uint', '', ),
432 'oz.rdi': ( 'uint', '', ),
433 'oz.r8': ( 'uint', '', ),
434 'oz.r9': ( 'uint', '', ),
435 'oz.r10': ( 'uint', '', ),
436 'oz.r11': ( 'uint', '', ),
437 'oz.r12': ( 'uint', '', ),
438 'oz.r13': ( 'uint', '', ),
439 'oz.r14': ( 'uint', '', ),
440 'oz.r15': ( 'uint', '', ),
441 };
442
443 def __init__(self, sField, sOp, sValue, sType):
444 assert sField in self.kdFields;
445 assert sOp in self.kasOperators;
446 self.sField = sField;
447 self.sOp = sOp;
448 self.sValue = sValue;
449 self.sType = sType;
450
451
452class TestSelector(object):
453 """
454 One selector for an instruction test.
455 """
456 ## Selector compare operators.
457 kasCompareOps = [ '==', '!=' ];
458 ## Selector variables and their valid values.
459 kdVariables = {
460 # Operand size.
461 'size': {
462 'o16': 'size_o16',
463 'o32': 'size_o32',
464 'o64': 'size_o64',
465 },
466 # Execution ring.
467 'ring': {
468 '0': 'ring_0',
469 '1': 'ring_1',
470 '2': 'ring_2',
471 '3': 'ring_3',
472 '0..2': 'ring_0_thru_2',
473 '1..3': 'ring_1_thru_3',
474 },
475 # Basic code mode.
476 'codebits': {
477 '64': 'code_64bit',
478 '32': 'code_32bit',
479 '16': 'code_16bit',
480 },
481 # cpu modes.
482 'mode': {
483 'real': 'mode_real',
484 'prot': 'mode_prot',
485 'long': 'mode_long',
486 'v86': 'mode_v86',
487 'smm': 'mode_smm',
488 'vmx': 'mode_vmx',
489 'svm': 'mode_svm',
490 },
491 # paging on/off
492 'paging': {
493 'on': 'paging_on',
494 'off': 'paging_off',
495 },
496 };
497 ## Selector shorthand predicates.
498 ## These translates into variable expressions.
499 kdPredicates = {
500 'o16': 'size==o16',
501 'o32': 'size==o32',
502 'o64': 'size==o64',
503 'ring0': 'ring==0',
504 '!ring0': 'ring==1..3',
505 'ring1': 'ring==1',
506 'ring2': 'ring==2',
507 'ring3': 'ring==3',
508 'user': 'ring==3',
509 'supervisor': 'ring==0..2',
510 'real': 'mode==real',
511 'prot': 'mode==prot',
512 'long': 'mode==long',
513 'v86': 'mode==v86',
514 'smm': 'mode==smm',
515 'vmx': 'mode==vmx',
516 'svm': 'mode==svm',
517 'paging': 'paging==on',
518 '!paging': 'paging==off',
519 };
520
521 def __init__(self, sVariable, sOp, sValue):
522 assert sVariable in self.kdVariables;
523 assert sOp in self.kasCompareOps;
524 assert sValue in self.kdVariables[sVariable];
525 self.sVariable = sVariable;
526 self.sOp = sOp;
527 self.sValue = sValue;
528
529
530class InstructionTest(object):
531 """
532 Instruction test.
533 """
534
535 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
536 self.oInstr = oInstr; # type: InstructionTest
537 self.aoInputs = [];
538 self.aoOutputs = [];
539 self.aoSelectors = []; # type: list(TestSelector)
540
541
542class Operand(object):
543 """
544 Instruction operand.
545 """
546
547 ## \@op[1-4]
548 kdLocations = {
549 'reg': [], ## modrm.reg
550 'rm': [], ## modrm.rm
551 };
552
553 ## \@op[1-4]
554 kdTypes = {
555 'Eb': [],
556 'Gb': [],
557 };
558
559 def __init__(self, sWhere, sType):
560 assert sWhere in self.kdLocations;
561 assert sType in self.kdTypes;
562 self.sWhere = sWhere; ##< kdLocations
563 self.sType = sType; ##< kdTypes
564
565
566class Instruction(object):
567 """
568 Instruction.
569 """
570
571 def __init__(self, sSrcFile, iLine):
572 ## @name Core attributes.
573 ## @{
574 self.sMnemonic = None;
575 self.sBrief = None;
576 self.asDescSections = []; # type: list(str)
577 self.aoMaps = []; # type: list(InstructionMap)
578 self.aoOperands = []; # type: list(Operand)
579 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
580 self.sOpcode = None;
581 self.sEncoding = None;
582 self.asFlTest = None;
583 self.asFlModify = None;
584 self.asFlUndefined = None;
585 self.asFlSet = None;
586 self.asFlClear = None;
587 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
588 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
589 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
590 self.aoTests = []; # type: list(InstructionTest)
591 self.oCpus = None; ##< Some CPU restriction expression...
592 self.sGroup = None;
593 self.fUnused = False; ##< Unused instruction.
594 self.fInvalid = False; ##< Invalid instruction (like UD2).
595 self.sInvalidStyle = None; ##< Invalid behviour style
596 ## @}
597
598 ## @name Implementation attributes.
599 ## @{
600 self.sStats = None;
601 self.sFunction = None;
602 self.fStub = False;
603 self.fUdStub = False;
604 ## @}
605
606 ## @name Decoding info
607 ## @{
608 self.sSrcFile = sSrcFile;
609 self.iLineCreated = iLine;
610 self.iLineCompleted = None;
611 self.cOpTags = 0;
612 ## @}
613
614 ## @name Intermediate input fields.
615 ## @{
616 self.sRawDisOpNo = None;
617 self.asRawDisParams = [];
618 self.sRawIemOpFlags = None;
619 self.sRawOldOpcodes = None;
620 ## @}
621
622
623## All the instructions.
624g_aoAllInstructions = []; # type: Instruction
625
626## Instruction maps.
627g_dInstructionMaps = {
628 'one': InstructionMap('one'),
629 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
630 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
631 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
632 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
633 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
634 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
635 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
636 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
637 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
638 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
639 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
640 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
641 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
642 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
643 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
644 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
645 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
646 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
647 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
648
649 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
650 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
651 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
652 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
653 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
654 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
655 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
656 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
657 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
658 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
659 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
660 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
661 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
662 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
663
664 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
665 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
666 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
667
668 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
669 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
670 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
671 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
672 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
673 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
674
675 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
676 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
677
678 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
679 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
680 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
681 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
682 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
683 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
684 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
685};
686
687
688
689class ParserException(Exception):
690 """ Parser exception """
691 def __init__(self, sMessage):
692 Exception.__init__(self, sMessage);
693
694
695class SimpleParser(object):
696 """
697 Parser of IEMAllInstruction*.cpp.h instruction specifications.
698 """
699
700 ## @name Parser state.
701 ## @{
702 kiCode = 0;
703 kiCommentMulti = 1;
704 ## @}
705
706 def __init__(self, sSrcFile, asLines, sDefaultMap):
707 self.sSrcFile = sSrcFile;
708 self.asLines = asLines;
709 self.iLine = 0;
710 self.iState = self.kiCode;
711 self.sComment = '';
712 self.iCommentLine = 0;
713 self.asCurInstr = [];
714
715 assert sDefaultMap in g_dInstructionMaps;
716 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
717
718 self.cTotalInstr = 0;
719 self.cTotalStubs = 0;
720 self.cTotalTagged = 0;
721
722 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
723 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
724 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
725 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
726 self.oReGroupName = re.compile('^op_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
727 self.fDebug = True;
728
729 self.dTagHandlers = {
730 '@opbrief': self.parseTagOpBrief,
731 '@opdesc': self.parseTagOpDesc,
732 '@opmnemonic': self.parseTagOpMnemonic,
733 '@op1': self.parseTagOpOperandN,
734 '@op2': self.parseTagOpOperandN,
735 '@op3': self.parseTagOpOperandN,
736 '@op4': self.parseTagOpOperandN,
737 '@oppfx': self.parseTagOpPfx,
738 '@opmaps': self.parseTagOpMaps,
739 '@opcode': self.parseTagOpcode,
740 '@openc': self.parseTagOpEnc,
741 '@opfltest': self.parseTagOpEFlags,
742 '@opflmodify': self.parseTagOpEFlags,
743 '@opflundef': self.parseTagOpEFlags,
744 '@opflset': self.parseTagOpEFlags,
745 '@opflclear': self.parseTagOpEFlags,
746 '@ophints': self.parseTagOpHints,
747 '@opcpuid': self.parseTagOpCpuId,
748 '@opgroup': self.parseTagOpGroup,
749 '@opunused': self.parseTagOpUnusedInvalid,
750 '@opinvalid': self.parseTagOpUnusedInvalid,
751 '@opinvlstyle': self.parseTagOpUnusedInvalid,
752 '@optest': self.parseTagOpTest,
753 '@opstats': self.parseTagOpStats,
754 '@opfunction': self.parseTagOpFunction,
755 '@opdone': self.parseTagOpDone,
756 };
757
758 self.asErrors = [];
759
760 def raiseError(self, sMessage):
761 """
762 Raise error prefixed with the source and line number.
763 """
764 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
765
766 def raiseCommentError(self, iLineInComment, sMessage):
767 """
768 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
769 """
770 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
771
772 def error(self, sMessage):
773 """
774 Adds an error.
775 returns False;
776 """
777 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
778 return False;
779
780 def errorComment(self, iLineInComment, sMessage):
781 """
782 Adds a comment error.
783 returns False;
784 """
785 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
786 return False;
787
788 def printErrors(self):
789 """
790 Print the errors to stderr.
791 Returns number of errors.
792 """
793 if len(self.asErrors) > 0:
794 sys.stderr.write(u''.join(self.asErrors));
795 return len(self.asErrors);
796
797 def debug(self, sMessage):
798 """
799 """
800 if self.fDebug:
801 print('debug: %s' % (sMessage,));
802
803
804 def addInstruction(self, iLine = None):
805 """
806 Adds an instruction.
807 """
808 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
809 g_aoAllInstructions.append(oInstr);
810 self.asCurInstr.append(oInstr);
811 return oInstr;
812
813 def doneInstructionOne(self, oInstr, iLine):
814 """
815 Complete the parsing by processing, validating and expanding raw inputs.
816 """
817 assert oInstr.iLineCompleted is None;
818 oInstr.iLineCompleted = iLine;
819
820 #
821 # Specified instructions.
822 #
823 if oInstr.cOpTags > 0:
824 if oInstr.sStats is None:
825 pass;
826
827 #
828 # Unspecified legacy stuff. We generally only got a few things to go on here.
829 # /** Opcode 0x0f 0x00 /0. */
830 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
831 #
832 else:
833 #if oInstr.sRawOldOpcodes:
834 #
835 #if oInstr.sMnemonic:
836 pass;
837
838 #
839 # Apply default map and then add the instruction to all it's groups.
840 #
841 if len(oInstr.aoMaps) == 0:
842 oInstr.aoMaps = [ self.oDefaultMap, ];
843 for oMap in oInstr.aoMaps:
844 oMap.aoInstructions.append(oInstr);
845
846 self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
847 return True;
848
849 def doneInstructions(self, iLineInComment = None):
850 """
851 Done with current instruction.
852 """
853 for oInstr in self.asCurInstr:
854 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
855 if oInstr.fStub:
856 self.cTotalStubs += 1;
857
858 self.cTotalInstr += len(self.asCurInstr);
859
860 self.sComment = '';
861 self.asCurInstr = [];
862 return True;
863
864 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
865 """
866 Sets the sAttrib of all current instruction to oValue. If fOverwrite
867 is False, only None values and empty strings are replaced.
868 """
869 for oInstr in self.asCurInstr:
870 if fOverwrite is not True:
871 oOldValue = getattr(oInstr, sAttrib);
872 if oOldValue is not None:
873 continue;
874 setattr(oInstr, sAttrib, oValue);
875
876 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
877 """
878 Sets the iEntry of the array sAttrib of all current instruction to oValue.
879 If fOverwrite is False, only None values and empty strings are replaced.
880 """
881 for oInstr in self.asCurInstr:
882 aoArray = getattr(oInstr, sAttrib);
883 while len(aoArray) <= iEntry:
884 aoArray.append(None);
885 if fOverwrite is True or aoArray[iEntry] is None:
886 aoArray[iEntry] = oValue;
887
888 def parseCommentOldOpcode(self, asLines):
889 """ Deals with 'Opcode 0xff /4' like comments """
890 asWords = asLines[0].split();
891 if len(asWords) >= 2 \
892 and asWords[0] == 'Opcode' \
893 and ( asWords[1].startswith('0x')
894 or asWords[1].startswith('0X')):
895 asWords = asWords[:1];
896 for iWord, sWord in enumerate(asWords):
897 if sWord.startswith('0X'):
898 sWord = '0x' + sWord[:2];
899 asWords[iWord] = asWords;
900 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
901
902 return False;
903
904 def ensureInstructionForOpTag(self, iTagLine):
905 """ Ensure there is an instruction for the op-tag being parsed. """
906 if len(self.asCurInstr) == 0:
907 self.addInstruction(self.iCommentLine + iTagLine);
908 for oInstr in self.asCurInstr:
909 oInstr.cOpTags += 1;
910 if oInstr.cOpTags == 1:
911 self.cTotalTagged += 1;
912 return self.asCurInstr[-1];
913
914 @staticmethod
915 def flattenSections(aasSections):
916 """
917 Flattens multiline sections into stripped single strings.
918 Returns list of strings, on section per string.
919 """
920 asRet = [];
921 for asLines in assSections:
922 if len(asLines) > 0:
923 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
924 return asRet;
925
926 @staticmethod
927 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
928 """
929 Flattens sections into a simple stripped string with newlines as
930 section breaks. The final section does not sport a trailing newline.
931 """
932 # Typical: One section with a single line.
933 if len(aasSections) == 1 and len(aasSections[0]) == 1:
934 return aasSections[0][0].strip();
935
936 sRet = '';
937 for iSection, asLines in enumerate(aasSections):
938 if len(asLines) > 0:
939 if iSection > 0:
940 sRet += sSectionSep;
941 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
942 return sRet;
943
944
945
946 ## @name Tag parsers
947 ## @{
948
949 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
950 """
951 Tag: \@opbrief
952 Value: Text description, multiple sections, appended.
953
954 Brief description. If not given, it's the first sentence from @opdesc.
955 """
956 oInstr = self.ensureInstructionForOpTag(iTagLine);
957
958 # Flatten and validate the value.
959 sBrief = self.flattenAllSections(aasSections);
960 if len(sBrief) == 0:
961 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
962 if sBrief[-1] != '.':
963 sBrief = sBrief + '.';
964 if len(sBrief) > 180:
965 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
966 offDot = sBrief.find('.');
967 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
968 offDot = sBrief.find('.', offDot + 1);
969 if offDot >= 0 and offDot != len(sBrief) - 1:
970 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
971
972 # Update the instruction.
973 if oInstr.sBrief is not None:
974 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
975 % (sTag, oInstr.sBrief, sBrief,));
976 _ = iEndLine;
977 return True;
978
979 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
980 """
981 Tag: \@opdesc
982 Value: Text description, multiple sections, appended.
983
984 It is used to describe instructions.
985 """
986 oInstr = self.ensureInstructionForOpTag(iTagLine);
987 if len(self.aoInstructions) > 0 and len(aasSections) > 0:
988 oInstr.asDescSections.extend(self.flattenSections(aasSections));
989 return True;
990
991 _ = sTag; _ = iEndLine;
992 return True;
993
994 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
995 """
996 Tag: @opmenmonic
997 Value: mnemonic
998
999 The 'mnemonic' value must be a valid C identifier string. Because of
1000 prefixes, groups and whatnot, there times when the mnemonic isn't that
1001 of an actual assembler mnemonic.
1002 """
1003 oInstr = self.ensureInstructionForOpTag(iTagLine);
1004
1005 # Flatten and validate the value.
1006 sMnemonic = self.flattenAllSections(aasSections);
1007 if not self.oReMnemonic.match(sMnemonic):
1008 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1009 if oInstr.sMnemonic is not None:
1010 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1011 % (sTag, oInstr.sMnemonic, sMnemonic,));
1012 oInstr.sMnemonic = sMnemonic
1013
1014 _ = iEndLine;
1015 return True;
1016
1017 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1018 """
1019 Tags: \@op1, \@op2, \@op3, \@op4
1020 Value: where:type
1021
1022 The 'where' value indicates where the operand is found, like the 'reg'
1023 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1024 a list.
1025
1026 The 'type' value indicates the operand type. These follow the types
1027 given in the opcode tables in the CPU reference manuals.
1028 See Instruction.kdOperandTypes for a list.
1029
1030 """
1031 oInstr = self.ensureInstructionForOpTag(iTagLine);
1032 idxOp = int(sTag[-1]) - 1;
1033 assert idxOp >= 0 and idxOp < 4;
1034
1035 # flatten, split up, and validate the "where:type" value.
1036 sFlattened = self.flattenAllSections(aasSections);
1037 asSplit = sFlattened.split(':');
1038 if len(asSplit) != 2:
1039 return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
1040
1041 (sWhere, sType) = asSplit;
1042 if sWhere not in Operand.kdLocations:
1043 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1044 % (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
1045
1046 if sType not in Operand.kdTypes:
1047 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1048 % (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
1049
1050 # Insert the operand, refusing to overwrite an existing one.
1051 while idxOp >= len(oInstr.aoOperands):
1052 oInstr.aoOperands.append(None);
1053 if oInstr.aoOperands[idxOp] is not None:
1054 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1055 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1056 sWhere, sType,));
1057 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1058
1059 _ = iEndLine;
1060 return True;
1061
1062 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1063 """
1064 Tag: \@opmaps
1065 Value: map[,map2]
1066
1067 Indicates which maps the instruction is in. There is a default map
1068 associated with each input file.
1069 """
1070 oInstr = self.ensureInstructionForOpTag(iTagLine);
1071
1072 # Flatten, split up and validate the value.
1073 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1074 asMaps = sFlattened.split(',');
1075 if len(asMaps) == 0:
1076 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1077 for sMap in asMaps:
1078 if sMap not in g_dInstructionMaps:
1079 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1080 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1081
1082 # Add the maps to the current list. Throw errors on duplicates.
1083 for oMap in oInstr.aoMaps:
1084 if oMap.sName in asMaps:
1085 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1086
1087 for sMap in asMaps:
1088 oMap = g_dInstructionMaps[sMap];
1089 if oMap not in oInstr.aoMaps:
1090 oInstr.aoMaps.append(oMap);
1091 else:
1092 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1093
1094 _ = iEndLine;
1095 return True;
1096
1097 ## \@oppfx values.
1098 kdPrefixes = {
1099 '0x66': [],
1100 '0xf3': [],
1101 '0xf2': [],
1102 };
1103
1104 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1105 """
1106 Tag: \@oppfx
1107 Value: none|0x66|0xf3|0xf2
1108
1109 Required prefix for the instruction. (In a (E)VEX context this is the
1110 value of the 'pp' field rather than an actual prefix.)
1111 """
1112 oInstr = self.ensureInstructionForOpTag(iTagLine);
1113
1114 # Flatten and validate the value.
1115 sFlattened = self.flattenAllSections(aasSections);
1116 asPrefixes = sFlattened.split();
1117 if len(asPrefixes) > 1:
1118 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1119
1120 sPrefix = asPrefixes[0].lower();
1121 if sPrefix == 'none':
1122 sPrefix = None;
1123 else:
1124 if len(sPrefix) == 2:
1125 sPrefix = '0x' + sPrefix;
1126 if _isValidOpcodeByte(sPrefix):
1127 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1128
1129 if sPrefix is not None and sPrefix not in self.kdPrefixes:
1130 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
1131
1132 # Set it.
1133 if oInstr.sPrefix is not None:
1134 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1135 oInstr.sPrefix = sPrefix;
1136
1137 _ = iEndLine;
1138 return True;
1139
1140 ## Special \@opcode tag values.
1141 kdSpecialOpcodes = {
1142 '/reg': [],
1143 'mr/reg': [],
1144 '11 /reg': [],
1145 '!11 /reg': [],
1146 '11 mr/reg': [],
1147 '!11 mr/reg': [],
1148 };
1149
1150 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1151 """
1152 Tag: \@opcode
1153 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1154
1155 The opcode byte or sub-byte for the instruction in the context of a map.
1156 """
1157 oInstr = self.ensureInstructionForOpTag(iTagLine);
1158
1159 # Flatten and validate the value.
1160 sOpcode = self.flattenAllSections(aasSections);
1161 if sOpcode in self.kdSpecialOpcodes:
1162 pass;
1163 elif not _isValidOpcodeByte(sOpcode):
1164 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1165
1166 # Set it.
1167 if oInstr.sOpcode is not None:
1168 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1169 oInstr.sOpcode = sOpcode;
1170
1171 _ = iEndLine;
1172 return True;
1173
1174 ## Valid values for \@openc
1175 kdEncodings = {
1176 'ModR/M': [],
1177 };
1178
1179 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1180 """
1181 Tag: \@openc
1182 Value: ModR/M|TBD
1183
1184 The instruction operand encoding style.
1185 """
1186 oInstr = self.ensureInstructionForOpTag(iTagLine);
1187
1188 # Flatten and validate the value.
1189 sEncoding = self.flattenAllSections(aasSections);
1190 if sEncoding in self.kdEncodings:
1191 pass;
1192 elif not _isValidOpcodeByte(sEncoding):
1193 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1194
1195 # Set it.
1196 if oInstr.sEncoding is not None:
1197 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1198 % ( sTag, oInstr.sEncoding, sEncoding,));
1199 oInstr.sEncoding = sEncoding;
1200
1201 _ = iEndLine;
1202 return True;
1203
1204 ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1205 kdEFlags = {
1206 # Debugger flag notation:
1207 'ov': 'X86_EFL_OF', ##< OVerflow.
1208 'nv': '!X86_EFL_OF', ##< No Overflow.
1209
1210 'ng': 'X86_EFL_SF', ##< NeGative (sign).
1211 'pl': '!X86_EFL_SF', ##< PLuss (sign).
1212
1213 'zr': 'X86_EFL_ZF', ##< ZeRo.
1214 'nz': '!X86_EFL_ZF', ##< No Zero.
1215
1216 'af': 'X86_EFL_AF', ##< Aux Flag.
1217 'na': '!X86_EFL_AF', ##< No Aux.
1218
1219 'po': 'X86_EFL_PF', ##< Parity Pdd.
1220 'pe': '!X86_EFL_PF', ##< Parity Even.
1221
1222 'cf': 'X86_EFL_CF', ##< Carry Flag.
1223 'nc': '!X86_EFL_CF', ##< No Carry.
1224
1225 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1226 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1227
1228 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1229 'up': '!X86_EFL_DF', ##< UP (string op direction).
1230
1231 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1232 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1233 'ac': 'X86_EFL_AC', ##< Alignment Check.
1234 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1235 'rf': 'X86_EFL_RF', ##< Resume Flag.
1236 'nt': 'X86_EFL_NT', ##< Nested Task.
1237 'tf': 'X86_EFL_TF', ##< Trap flag.
1238
1239 # Reference manual notation:
1240 'of': 'X86_EFL_OF',
1241 'sf': 'X86_EFL_SF',
1242 'zf': 'X86_EFL_ZF',
1243 'cf': 'X86_EFL_CF',
1244 'pf': 'X86_EFL_PF',
1245 'if': 'X86_EFL_IF',
1246 'df': 'X86_EFL_DF',
1247 'iopl': 'X86_EFL_IOPL',
1248 'id': 'X86_EFL_ID',
1249 };
1250
1251 ## EFlags tag to Instruction attribute name.
1252 kdOpFlagToAttr = {
1253 '@opfltest': 'asFlTest',
1254 '@opflmodify': 'asFlModify',
1255 '@opflundef': 'asFlUndefined',
1256 '@opflset': 'asFlSet',
1257 '@opflclear': 'asFlClear',
1258 };
1259
1260 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1261 """
1262 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1263 Value: <eflags specifier>
1264
1265 """
1266 oInstr = self.ensureInstructionForOpTag(iTagLine);
1267
1268 # Flatten, split up and validate the values.
1269 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1270 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1271 asFlags = [];
1272 else:
1273 fRc = True;
1274 for iFlag, sFlag in enumerate(asFlags):
1275 if sFlag not in self.kdEFlags:
1276 if sFlag.strip() in self.kdEFlags:
1277 asFlags[iFlag] = sFlag.strip();
1278 else:
1279 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1280 if not fRc:
1281 return False;
1282
1283 # Set them.
1284 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1285 if asOld is not None:
1286 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1287 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1288
1289 _ = iEndLine;
1290 return True;
1291
1292 ## \@ophints values.
1293 kdHints = {
1294 'invalid': 'DISOPTYPE_INVALID', ##<
1295 'harmless': 'DISOPTYPE_HARMLESS', ##<
1296 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1297 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1298 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1299 'portio': 'DISOPTYPE_PORTIO', ##<
1300 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1301 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1302 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1303 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1304 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1305 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1306 'illegal': 'DISOPTYPE_ILLEGAL', ##<
1307 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1308 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1309 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1310 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1311 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1312 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1313 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1314 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1315 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1316 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1317 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1318 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1319 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1320 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1321 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1322 };
1323
1324 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1325 """
1326 Tag: \@ophints
1327 Value: Comma or space separated list of flags and hints.
1328
1329 This covers the disassembler flags table and more.
1330 """
1331 oInstr = self.ensureInstructionForOpTag(iTagLine);
1332
1333 # Flatten as a space separated list, split it up and validate the values.
1334 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1335 if len(asHints) == 1 and asHints[0].lower() == 'none':
1336 asHints = [];
1337 else:
1338 fRc = True;
1339 for iHint, sHint in enumerate(asHints):
1340 if sHint not in self.kdHints:
1341 if sHint.strip() in self.kdHints:
1342 sHint[iHint] = sHint.strip();
1343 else:
1344 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1345 if not fRc:
1346 return False;
1347
1348 # Append them.
1349 for sHint in asHints:
1350 if sHint not in oInstr.dHints:
1351 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1352 else:
1353 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1354
1355 _ = iEndLine;
1356 return True;
1357
1358 ## \@opcpuid
1359 kdCpuIdFlags = {
1360 'vme': 'X86_CPUID_FEATURE_EDX_VME',
1361 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1362 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1363 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1364 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1365 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1366 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1367 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1368 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1369 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1370 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1371 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1372 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1373 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1374 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1375 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1376 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1377 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1378 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1379 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1380 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1381 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1382 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1383 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1384 'aes': 'X86_CPUID_FEATURE_ECX_AES',
1385 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1386 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1387 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1388 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1389
1390 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1391 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1392 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1393 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1394 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1395 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1396 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1397 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1398 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1399 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1400 };
1401
1402 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1403 """
1404 Tag: \@opcpuid
1405 Value: none | <CPUID flag specifier>
1406
1407 CPUID feature bit which is required for the instruction to be present.
1408 """
1409 oInstr = self.ensureInstructionForOpTag(iTagLine);
1410
1411 # Flatten as a space separated list, split it up and validate the values.
1412 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1413 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1414 asCpuIds = [];
1415 else:
1416 fRc = True;
1417 for iCpuId, sCpuId in enumerate(asCpuIds):
1418 if sCpuId not in self.kdCpuIds:
1419 if sCpuId.strip() in self.kdCpuIds:
1420 sCpuId[iCpuId] = sCpuId.strip();
1421 else:
1422 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1423 if not fRc:
1424 return False;
1425
1426 # Append them.
1427 for sCpuId in asCpuIds:
1428 if sCpuId not in oInstr.asCpuIds:
1429 oInstr.asCpuIds.append(sCpuId);
1430 else:
1431 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1432
1433 _ = iEndLine;
1434 return True;
1435
1436 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1437 """
1438 Tag: \@opgroup
1439 Value: op_grp1[_subgrp2[_subsubgrp3]]
1440
1441 Instruction grouping.
1442 """
1443 oInstr = self.ensureInstructionForOpTag(iTagLine);
1444
1445 # Flatten as a space separated list, split it up and validate the values.
1446 asGroups = self.flattenAllSections(aasSections).split();
1447 if len(asGroups) != 1:
1448 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1449 sGroup = asGroups[0];
1450 if not self.oReGroupName.match(sGroup):
1451 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1452 % (sTag, sGroup, self.oReGroupName.pattern));
1453
1454 # Set it.
1455 if oInstr.sGroup is not None:
1456 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1457 oInstr.sGroup = sGroup;
1458
1459 _ = iEndLine;
1460 return True;
1461
1462 ## \@opunused, \@opinvalid, \@opinvlstyle
1463 kdInvalidStyles = {
1464 'immediate': [], ##< CPU stops decoding immediately after the opcode.
1465 'intel-modrm': [], ##< Intel decodes ModR/M.
1466 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1467 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1468 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1469 };
1470
1471 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1472 """
1473 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1474 Value: <invalid opcode behaviour style>
1475
1476 The \@opunused indicates the specification is for a currently unused
1477 instruction encoding.
1478
1479 The \@opinvalid indicates the specification is for an invalid currently
1480 instruction encoding (like UD2).
1481
1482 The \@opinvlstyle just indicates how CPUs decode the instruction when
1483 not supported (\@opcpuid, \@opmincpu) or disabled.
1484 """
1485 oInstr = self.ensureInstructionForOpTag(iTagLine);
1486
1487 # Flatten as a space separated list, split it up and validate the values.
1488 asStyles = self.flattenAllSections(aasSections).split();
1489 if len(asStyles) != 1:
1490 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1491 sStyle = asStyles[0];
1492 if sStyle not in self.kdInvalidStyle:
1493 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1494 % (sTag, sStyle, self.kdInvalidStyles.keys(),));
1495 # Set it.
1496 if oInstr.sInvlStyle is not None:
1497 return self.errorComment(iTagLine,
1498 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1499 % ( sTag, oInstr.sInvlStyle, sStyle,));
1500 oInstr.sInvlStyle = sStyle;
1501 if sTag == '@opunused':
1502 oInstr.fUnused = True;
1503 elif sTag == '@opinvalid':
1504 oInstr.fInvalid = True;
1505
1506 _ = iEndLine;
1507 return True;
1508
1509 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1510 """
1511 Tag: \@optest
1512 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1513 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1514
1515 The main idea here is to generate basic instruction tests.
1516
1517 The probably simplest way of handling the diverse input, would be to use
1518 it to produce size optimized byte code for a simple interpreter that
1519 modifies the register input and output states.
1520
1521 An alternative to the interpreter would be creating multiple tables,
1522 but that becomes rather complicated wrt what goes where and then to use
1523 them in an efficient manner.
1524 """
1525 oInstr = self.ensureInstructionForOpTag(iTagLine);
1526
1527 #
1528 # Do it section by section.
1529 #
1530 for asSectionLines in aasSections:
1531 #
1532 # Sort the input into outputs, inputs and selector conditions.
1533 #
1534 sFlatSection = self.flattenAllSections([asSectionLines,]);
1535 if len(sFlatSection) == 0:
1536 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1537 continue;
1538 oTest = InstructionTest(oInstr);
1539
1540 asSelectors = [];
1541 asInputs = [];
1542 asOutputs = [];
1543 asCur = asOutputs;
1544 fRc = True;
1545 asWords = sFlatSection.split();
1546 for iWord in range(len(asWords) - 1, -1, -1):
1547 sWord = asWords[iWord];
1548 # Check for array switchers.
1549 if sWord == '->':
1550 if asCur != asOutputs:
1551 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1552 break;
1553 asCur = asInputs;
1554 elif sWord == '/':
1555 if asCur != asInputs:
1556 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1557 break;
1558 asCur = asSelectors;
1559 else:
1560 asCur.insert(0, sWord);
1561
1562 #
1563 # Validate and add selectors.
1564 #
1565 for sCond in asSelectors:
1566 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1567 oSelector = None;
1568 for sOp in TestSelector.kasCompareOps:
1569 off = sCondExp.find(sOp);
1570 if off >= 0:
1571 sVariable = sCondExp[:off];
1572 sValue = sCondExp[off + len(sOp):];
1573 if sVariable in TestSelector.kdVariables:
1574 if sValue in TestSelector.kdVariables[sVariable]:
1575 oSelector = TestSelector(sVariable, sOp, sValue);
1576 else:
1577 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1578 % ( sTag, sValue, sCond,
1579 TestSelector.kdVariables[sVariable].keys(),));
1580 else:
1581 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1582 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1583 break;
1584 if oSelector is not None:
1585 for oExisting in oTest.aoSelectors:
1586 if oExisting.sVariable == oSelector.sVariable:
1587 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1588 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
1589 oTest.aoSelectors.append(oSelector);
1590 else:
1591 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1592
1593 #
1594 # Validate outputs and inputs, adding them to the test as we go along.
1595 #
1596 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1597 for sItem in asItems:
1598 oItem = None;
1599 for sOp in TestInOut.kasOperators:
1600 off = sItem.find(sOp);
1601 if off >= 0:
1602 sField = sItem[:off];
1603 sValueType = sItem[off + len(sOp):];
1604 if sField in TestInOut.kdFields:
1605 asSplit = sValueType.split(':', 1);
1606 sValue = asSplit[0];
1607 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1608 if sType in TestInOut.kdTypes:
1609 oValid = TestInOut.kdTypes[sType].validate(sValue);
1610 if oValid is True:
1611 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '=':
1612 oItem = TestInOut(sField, sOp, sValue, sType);
1613 else:
1614 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with the "="'
1615 % ( sTag, sDesc, sItem, ));
1616 else:
1617 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1618 % ( sTag, sDesc, sValue, sItem, sType, ));
1619 else:
1620 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1621 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1622 else:
1623 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1624 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1625 break;
1626 if oItem is not None:
1627 for oExisting in aoDst:
1628 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1629 self.errorComment(iTagLine,
1630 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1631 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1632 aoDst.append(oItem);
1633 else:
1634 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
1635
1636 #
1637 # .
1638 #
1639 if fRc:
1640 oInstr.aoTests.append(oTest);
1641 else:
1642 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1643 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1644 % (sTag, asSelectors, asInputs, asOutputs,));
1645
1646 return True;
1647
1648 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1649 """
1650 Tag: \@opfunction
1651 Value: <VMM function name>
1652
1653 This is for explicitly setting the IEM function name. Normally we pick
1654 this up from the FNIEMOP_XXX macro invocation after the description, or
1655 generate it from the mnemonic and operands.
1656
1657 It it thought it maybe necessary to set it when specifying instructions
1658 which implementation isn't following immediately or aren't implemented yet.
1659 """
1660 oInstr = self.ensureInstructionForOpTag(iTagLine);
1661
1662 # Flatten and validate the value.
1663 sFunction = self.flattenAllSections(aasSections);
1664 if not self.oReFunctionName.match(sFunction):
1665 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1666 % (sTag, Name, self.oReFunctionName.pattern));
1667
1668 if oInstr.sFunction is not None:
1669 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1670 % (sTag, oInstr.sStats, sStats,));
1671 oInstr.sFunction = sFunction;
1672
1673 _ = iEndLine;
1674 return True;
1675
1676 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1677 """
1678 Tag: \@opstats
1679 Value: <VMM statistics base name>
1680
1681 This is for explicitly setting the statistics name. Normally we pick
1682 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1683 the mnemonic and operands.
1684
1685 It it thought it maybe necessary to set it when specifying instructions
1686 which implementation isn't following immediately or aren't implemented yet.
1687 """
1688 oInstr = self.ensureInstructionForOpTag(iTagLine);
1689
1690 # Flatten and validate the value.
1691 sStats = self.flattenAllSections(aasSections);
1692 if not self.oReStatsName.match(sStats):
1693 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1694 % (sTag, Name, self.oReStatsName.pattern));
1695
1696 if oInstr.sStats is not None:
1697 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1698 % (sTag, oInstr.sStats, sStats,));
1699 oInstr.sStats = sStats;
1700
1701 _ = iEndLine;
1702 return True;
1703
1704 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1705 """
1706 Tag: \@opdone
1707 Value: none
1708
1709 Used to explictily flush the instructions that have been specified.
1710 """
1711 sFlattened = self.flattenAllSections(aasSections);
1712 if sFlattened != '':
1713 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1714 _ = sTag; _ = iEndLine;
1715 return self.doneInstructions();
1716
1717 ## @}
1718
1719
1720 def parseComment(self):
1721 """
1722 Parse the current comment (self.sComment).
1723
1724 If it's a opcode specifiying comment, we reset the macro stuff.
1725 """
1726 #
1727 # Reject if comment doesn't seem to contain anything interesting.
1728 #
1729 if self.sComment.find('Opcode') < 0 \
1730 and self.sComment.find('@') < 0:
1731 return False;
1732
1733 #
1734 # Split the comment into lines, removing leading asterisks and spaces.
1735 # Also remove leading and trailing empty lines.
1736 #
1737 asLines = self.sComment.split('\n');
1738 for iLine, sLine in enumerate(asLines):
1739 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1740
1741 while len(asLines) > 0 and len(asLines[0]) == 0:
1742 self.iCommentLine += 1;
1743 asLines.pop(0);
1744
1745 while len(asLines) > 0 and len(asLines[-1]) == 0:
1746 asLines.pop(len(asLines) - 1);
1747
1748 #
1749 # Check for old style: Opcode 0x0f 0x12
1750 #
1751 if asLines[0].startswith('Opcode '):
1752 self.parseCommentOldOpcode(asLines);
1753
1754 #
1755 # Look for @op* tagged data.
1756 #
1757 cOpTags = 0;
1758 sFlatDefault = None;
1759 sCurTag = '@default';
1760 iCurTagLine = 0;
1761 asCurSection = [];
1762 aasSections = [ asCurSection, ];
1763 for iLine, sLine in enumerate(asLines):
1764 if not sLine.startswith('@'):
1765 if len(sLine) > 0:
1766 asCurSection.append(sLine);
1767 elif len(asCurSection) != 0:
1768 asCurSection = [];
1769 aasSections.append(asCurSection);
1770 else:
1771 #
1772 # Process the previous tag.
1773 #
1774 if sCurTag in self.dTagHandlers:
1775 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1776 cOpTags += 1;
1777 elif sCurTag.startswith('@op'):
1778 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1779 elif sCurTag == '@default':
1780 sFlatDefault = self.flattenAllSections(aasSections);
1781
1782 #
1783 # New tag.
1784 #
1785 asSplit = sLine.split(None, 1);
1786 sCurTag = asSplit[0].lower();
1787 if len(asSplit) > 1:
1788 asCurSection = [asSplit[1],];
1789 else:
1790 asCurSection = [];
1791 aasSections = [asCurSection, ];
1792 iCurTagLine = iLine;
1793
1794 #
1795 # Process the final tag.
1796 #
1797 if sCurTag in self.dTagHandlers:
1798 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1799 cOpTags += 1;
1800 elif sCurTag.startswith('@op'):
1801 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1802 elif sCurTag == '@default':
1803 sFlatDefault = self.flattenAllSections(aasSections);
1804
1805 #
1806 # Don't allow default text in blocks containing @op*.
1807 #
1808 if cOpTags > 0 and len(sFlatDefault) > 0:
1809 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1810
1811 return True;
1812
1813 def parseMacroInvocation(self, sInvocation):
1814 """
1815 Parses a macro invocation.
1816
1817 Returns a tuple, first element is the offset following the macro
1818 invocation. The second element is a list of macro arguments, where the
1819 zero'th is the macro name.
1820 """
1821 # First the name.
1822 offOpen = sInvocation.find('(');
1823 if offOpen <= 0:
1824 raiseError("macro invocation open parenthesis not found");
1825 sName = sInvocation[:offOpen].strip();
1826 if not self.oReMacroName.match(sName):
1827 return self.error("invalid macro name '%s'" % (sName,));
1828 asRet = [sName, ];
1829
1830 # Arguments.
1831 iLine = self.iLine;
1832 cDepth = 1;
1833 off = offOpen + 1;
1834 offStart = off;
1835 while cDepth > 0:
1836 if off >= len(sInvocation):
1837 if iLine >= len(self.asLines):
1838 return self.error('macro invocation beyond end of file');
1839 sInvocation += self.asLines[iLine];
1840 iLine += 1;
1841 ch = sInvocation[off];
1842
1843 if ch == ',' or ch == ')':
1844 if cDepth == 1:
1845 asRet.append(sInvocation[offStart:off].strip());
1846 offStart = off + 1;
1847 if ch == ')':
1848 cDepth -= 1;
1849 elif ch == '(':
1850 cDepth += 1;
1851 off += 1;
1852
1853 return (off, asRet);
1854
1855 def findAndParseMacroInvocation(self, sCode, sMacro):
1856 """
1857 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1858 """
1859 offHit = sCode.find(sMacro);
1860 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1861 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1862 return (offHit + offAfter, asRet);
1863 return (len(sCode), None);
1864
1865 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1866 """
1867 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1868 """
1869 for sMacro in asMacro:
1870 offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1871 if asRet is not None:
1872 return (offAfter, asRet);
1873 return (len(sCode), None);
1874
1875 def checkCodeForMacro(self, sCode):
1876 """
1877 Checks code for relevant macro invocation.
1878 """
1879 #
1880 # Scan macro invocations.
1881 #
1882 if sCode.find('(') > 0:
1883 # Look for instruction decoder function definitions. ASSUME single line.
1884 (_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1885 [ 'FNIEMOP_DEF',
1886 'FNIEMOP_STUB',
1887 'FNIEMOP_STUB_1',
1888 'FNIEMOP_UD_STUB',
1889 'FNIEMOP_UD_STUB_1' ]);
1890 if asArgs is not None:
1891 sFunction = asArgs[1];
1892
1893 if len(self.asCurInstr) == 0:
1894 self.addInstruction().sMnemonic = sFunction.split('_')[1];
1895 self.setInstrunctionAttrib('sFunction', sFunction);
1896 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1897 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1898 if asArgs[0].find('STUB') > 0:
1899 self.doneInstructions();
1900 return True;
1901
1902 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1903 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1904 if asArgs is not None:
1905 if len(self.asCurInstr) == 1:
1906 self.setInstrunctionAttrib('sStats', asArgs[1]);
1907 self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1908
1909 # IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1910 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1911 if asArgs is not None:
1912 if len(self.asCurInstr) == 1:
1913 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1914 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1915 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1916
1917 # IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1918 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1919 if asArgs is not None:
1920 if len(self.asCurInstr) == 1:
1921 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1922 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1923 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1924 self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1925
1926 return False;
1927
1928
1929 def parse(self):
1930 """
1931 Parses the given file.
1932 Returns number or errors.
1933 Raises exception on fatal trouble.
1934 """
1935 self.debug('Parsing %s' % (self.sSrcFile,));
1936
1937 while self.iLine < len(self.asLines):
1938 sLine = self.asLines[self.iLine];
1939 self.iLine += 1;
1940
1941 # We only look for comments, so only lines with a slash might possibly
1942 # influence the parser state.
1943 if sLine.find('/') >= 0:
1944 #self.debug('line %d: slash' % (self.iLine,));
1945
1946 offLine = 0;
1947 while offLine < len(sLine):
1948 if self.iState == self.kiCode:
1949 offHit = sLine.find('/*', offLine); # only multiline comments for now.
1950 if offHit >= 0:
1951 self.sComment = '';
1952 self.iCommentLine = self.iLine;
1953 self.iState = self.kiCommentMulti;
1954 offLine = offHit + 2;
1955 else:
1956 offLine = len(sLine);
1957
1958 elif self.iState == self.kiCommentMulti:
1959 offHit = sLine.find('*/', offLine);
1960 if offHit >= 0:
1961 self.sComment += sLine[offLine:offHit];
1962 self.iState = self.kiCode;
1963 offLine = offHit + 2;
1964 self.parseComment();
1965 else:
1966 self.sComment += sLine[offLine:];
1967 offLine = len(sLine);
1968 else:
1969 assert False;
1970
1971 # No slash, but append the line if in multi-line comment.
1972 elif self.iState == self.kiCommentMulti:
1973 #self.debug('line %d: multi' % (self.iLine,));
1974 self.sComment += sLine;
1975
1976 # No slash, but check code line for relevant macro.
1977 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1978 #self.debug('line %d: macro' % (self.iLine,));
1979 self.checkCodeForMacro(sLine);
1980
1981 # If the line is a '}' in the first position, complete the instructions.
1982 elif self.iState == self.kiCode and sLine[0] == '}':
1983 #self.debug('line %d: }' % (self.iLine,));
1984 self.doneInstructions();
1985
1986 self.doneInstructions();
1987 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1988 self.debug('%s instruction stubs' % (self.cTotalStubs,));
1989 return self.printErrors();
1990
1991
1992def __parseFileByName(sSrcFile, sDefaultMap):
1993 """
1994 Parses one source file for instruction specfications.
1995 """
1996 #
1997 # Read sSrcFile into a line array.
1998 #
1999 try:
2000 oFile = open(sSrcFile, "r");
2001 except Exception as oXcpt:
2002 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2003 try:
2004 asLines = oFile.readlines();
2005 except Exception as oXcpt:
2006 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2007 finally:
2008 oFile.close();
2009
2010 #
2011 # Do the parsing.
2012 #
2013 try:
2014 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2015 except ParserException as oXcpt:
2016 print(str(oXcpt));
2017 raise;
2018 except Exception as oXcpt:
2019 raise;
2020
2021 return cErrors;
2022
2023
2024def __parseAll():
2025 """
2026 Parses all the IEMAllInstruction*.cpp.h files.
2027
2028 Raises exception on failure.
2029 """
2030 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2031 cErrors = 0;
2032 for sDefaultMap, sName in [
2033 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2034 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2035 ]:
2036 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2037
2038 if cErrors != 0:
2039 raise Exception('%d parse errors' % (cErrors,));
2040 return True;
2041
2042
2043
2044__parseAll();
2045
2046
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette